2005-12-05 Jan Beulich <jbeulich@novell.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob4affcf4172835acf803245927f0bebabf8c6ffa6
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
208 struct ivopts_data
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
244 struct iv_ca
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
259 bitmap cands;
261 /* The number of candidates in the set. */
262 unsigned n_cands;
264 /* Total number of registers needed. */
265 unsigned n_regs;
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
271 unsigned cand_cost;
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
277 unsigned cost;
280 /* Difference of two iv candidate assignments. */
282 struct iv_ca_delta
284 /* Changed use. */
285 struct iv_use *use;
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
356 return loop->aux;
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 edge
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
366 if (!exit)
367 return NULL;
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
372 return exit;
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
381 if (iv->ssa_name)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
392 if (iv->step)
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
402 else
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
409 if (iv->base_object)
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
428 switch (use->type)
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
554 a &= mask;
555 b &= mask;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
566 if (!(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
589 *x = val;
591 return true;
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
602 gcc_assert (bb);
604 if (sbb == loop->latch)
605 return true;
607 if (sbb != bb)
608 return false;
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
626 if (stmt_bb != cand_bb)
627 return true;
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
646 switch (cand->pos)
648 case IP_END:
649 return false;
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
657 default:
658 gcc_unreachable ();
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
665 struct nfe_cache_elt
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
679 static hashval_t
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
689 static int
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
710 if (!*slot)
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
716 true);
717 *slot = nfe_desc;
719 else
720 nfe_desc = *slot;
722 if (!nfe_desc->valid_p)
723 return NULL;
725 return &nfe_desc->niter;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
730 goes wrong. */
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
735 edge exit = single_dom_exit (data->current_loop);
737 if (!exit)
738 return NULL;
740 return niter_for_exit (data, exit);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
746 static void
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
749 unsigned i;
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = xcalloc (data->version_info_size,
753 sizeof (struct version_info));
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
759 for (i = 1; i < loops->num; i++)
760 if (loops->parray[i])
761 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
763 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
764 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
765 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
771 static tree
772 determine_base_object (tree expr)
774 enum tree_code code = TREE_CODE (expr);
775 tree base, obj, op0, op1;
777 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
778 return NULL_TREE;
780 switch (code)
782 case INTEGER_CST:
783 return NULL_TREE;
785 case ADDR_EXPR:
786 obj = TREE_OPERAND (expr, 0);
787 base = get_base_address (obj);
789 if (!base)
790 return expr;
792 if (TREE_CODE (base) == INDIRECT_REF)
793 return determine_base_object (TREE_OPERAND (base, 0));
795 return fold_convert (ptr_type_node,
796 build_fold_addr_expr (base));
798 case PLUS_EXPR:
799 case MINUS_EXPR:
800 op0 = determine_base_object (TREE_OPERAND (expr, 0));
801 op1 = determine_base_object (TREE_OPERAND (expr, 1));
803 if (!op1)
804 return op0;
806 if (!op0)
807 return (code == PLUS_EXPR
808 ? op1
809 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
811 return fold_build2 (code, ptr_type_node, op0, op1);
813 case NOP_EXPR:
814 case CONVERT_EXPR:
815 return determine_base_object (TREE_OPERAND (expr, 0));
817 default:
818 return fold_convert (ptr_type_node, expr);
822 /* Allocates an induction variable with given initial value BASE and step STEP
823 for loop LOOP. */
825 static struct iv *
826 alloc_iv (tree base, tree step)
828 struct iv *iv = xcalloc (1, sizeof (struct iv));
830 if (step && integer_zerop (step))
831 step = NULL_TREE;
833 iv->base = base;
834 iv->base_object = determine_base_object (base);
835 iv->step = step;
836 iv->biv_p = false;
837 iv->have_use_for = false;
838 iv->use_id = 0;
839 iv->ssa_name = NULL_TREE;
841 return iv;
844 /* Sets STEP and BASE for induction variable IV. */
846 static void
847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
849 struct version_info *info = name_info (data, iv);
851 gcc_assert (!info->iv);
853 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
854 info->iv = alloc_iv (base, step);
855 info->iv->ssa_name = iv;
858 /* Finds induction variable declaration for VAR. */
860 static struct iv *
861 get_iv (struct ivopts_data *data, tree var)
863 basic_block bb;
865 if (!name_info (data, var)->iv)
867 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
869 if (!bb
870 || !flow_bb_inside_loop_p (data->current_loop, bb))
871 set_iv (data, var, var, NULL_TREE);
874 return name_info (data, var)->iv;
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
880 static tree
881 determine_biv_step (tree phi)
883 struct loop *loop = bb_for_stmt (phi)->loop_father;
884 tree name = PHI_RESULT (phi), base, step;
886 if (!is_gimple_reg (name))
887 return NULL_TREE;
889 if (!simple_iv (loop, phi, name, &base, &step, true))
890 return NULL_TREE;
892 if (zero_p (step))
893 return NULL_TREE;
895 return step;
898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
900 static bool
901 abnormal_ssa_name_p (tree exp)
903 if (!exp)
904 return false;
906 if (TREE_CODE (exp) != SSA_NAME)
907 return false;
909 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
913 abnormal phi node. Callback for for_each_index. */
915 static bool
916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
917 void *data ATTRIBUTE_UNUSED)
919 if (TREE_CODE (base) == ARRAY_REF)
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
922 return false;
923 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
924 return false;
927 return !abnormal_ssa_name_p (*index);
930 /* Returns true if EXPR contains a ssa name that occurs in an
931 abnormal phi node. */
933 static bool
934 contains_abnormal_ssa_name_p (tree expr)
936 enum tree_code code;
937 enum tree_code_class class;
939 if (!expr)
940 return false;
942 code = TREE_CODE (expr);
943 class = TREE_CODE_CLASS (code);
945 if (code == SSA_NAME)
946 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
948 if (code == INTEGER_CST
949 || is_gimple_min_invariant (expr))
950 return false;
952 if (code == ADDR_EXPR)
953 return !for_each_index (&TREE_OPERAND (expr, 0),
954 idx_contains_abnormal_ssa_name_p,
955 NULL);
957 switch (class)
959 case tcc_binary:
960 case tcc_comparison:
961 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
962 return true;
964 /* Fallthru. */
965 case tcc_unary:
966 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
967 return true;
969 break;
971 default:
972 gcc_unreachable ();
975 return false;
978 /* Finds basic ivs. */
980 static bool
981 find_bivs (struct ivopts_data *data)
983 tree phi, step, type, base;
984 bool found = false;
985 struct loop *loop = data->current_loop;
987 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
989 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
990 continue;
992 step = determine_biv_step (phi);
993 if (!step)
994 continue;
996 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
997 base = expand_simple_operations (base);
998 if (contains_abnormal_ssa_name_p (base)
999 || contains_abnormal_ssa_name_p (step))
1000 continue;
1002 type = TREE_TYPE (PHI_RESULT (phi));
1003 base = fold_convert (type, base);
1004 if (step)
1005 step = fold_convert (type, step);
1007 set_iv (data, PHI_RESULT (phi), base, step);
1008 found = true;
1011 return found;
1014 /* Marks basic ivs. */
1016 static void
1017 mark_bivs (struct ivopts_data *data)
1019 tree phi, var;
1020 struct iv *iv, *incr_iv;
1021 struct loop *loop = data->current_loop;
1022 basic_block incr_bb;
1024 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1026 iv = get_iv (data, PHI_RESULT (phi));
1027 if (!iv)
1028 continue;
1030 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031 incr_iv = get_iv (data, var);
1032 if (!incr_iv)
1033 continue;
1035 /* If the increment is in the subloop, ignore it. */
1036 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037 if (incr_bb->loop_father != data->current_loop
1038 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039 continue;
1041 iv->biv_p = true;
1042 incr_iv->biv_p = true;
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047 parameters to BASE and STEP. */
1049 static bool
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051 tree *base, tree *step)
1053 tree lhs;
1054 struct loop *loop = data->current_loop;
1056 *base = NULL_TREE;
1057 *step = NULL_TREE;
1059 if (TREE_CODE (stmt) != MODIFY_EXPR)
1060 return false;
1062 lhs = TREE_OPERAND (stmt, 0);
1063 if (TREE_CODE (lhs) != SSA_NAME)
1064 return false;
1066 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067 return false;
1068 *base = expand_simple_operations (*base);
1070 if (contains_abnormal_ssa_name_p (*base)
1071 || contains_abnormal_ssa_name_p (*step))
1072 return false;
1074 return true;
1077 /* Finds general ivs in statement STMT. */
1079 static void
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1082 tree base, step;
1084 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085 return;
1087 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1090 /* Finds general ivs in basic block BB. */
1092 static void
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1095 block_stmt_iterator bsi;
1097 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098 find_givs_in_stmt (data, bsi_stmt (bsi));
1101 /* Finds general ivs. */
1103 static void
1104 find_givs (struct ivopts_data *data)
1106 struct loop *loop = data->current_loop;
1107 basic_block *body = get_loop_body_in_dom_order (loop);
1108 unsigned i;
1110 for (i = 0; i < loop->num_nodes; i++)
1111 find_givs_in_bb (data, body[i]);
1112 free (body);
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116 variable and if so, its initial value and step. */
1118 static bool
1119 find_induction_variables (struct ivopts_data *data)
1121 unsigned i;
1122 bitmap_iterator bi;
1124 if (!find_bivs (data))
1125 return false;
1127 find_givs (data);
1128 mark_bivs (data);
1130 if (dump_file && (dump_flags & TDF_DETAILS))
1132 struct tree_niter_desc *niter;
1134 niter = niter_for_single_dom_exit (data);
1136 if (niter)
1138 fprintf (dump_file, " number of iterations ");
1139 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1142 fprintf (dump_file, " may be zero if ");
1143 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144 fprintf (dump_file, "\n");
1145 fprintf (dump_file, "\n");
1148 fprintf (dump_file, "Induction variables:\n\n");
1150 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1152 if (ver_info (data, i)->iv)
1153 dump_iv (dump_file, ver_info (data, i)->iv);
1157 return true;
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164 tree stmt, enum use_type use_type)
1166 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1168 use->id = n_iv_uses (data);
1169 use->type = use_type;
1170 use->iv = iv;
1171 use->stmt = stmt;
1172 use->op_p = use_p;
1173 use->related_cands = BITMAP_ALLOC (NULL);
1175 /* To avoid showing ssa name in the dumps, if it was not reset by the
1176 caller. */
1177 iv->ssa_name = NULL_TREE;
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 dump_use (dump_file, use);
1182 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1184 return use;
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188 NONLINEAR_USE is true if the invariant is used in a way we do not
1189 handle specially. */
1191 static void
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1194 basic_block bb;
1195 struct version_info *info;
1197 if (TREE_CODE (op) != SSA_NAME
1198 || !is_gimple_reg (op))
1199 return;
1201 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202 if (bb
1203 && flow_bb_inside_loop_p (data->current_loop, bb))
1204 return;
1206 info = name_info (data, op);
1207 info->name = op;
1208 info->has_nonlin_use |= nonlinear_use;
1209 if (!info->inv_id)
1210 info->inv_id = ++data->max_inv_id;
1211 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1214 /* Checks whether the use OP is interesting and if so, records it
1215 as TYPE. */
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1219 enum use_type type)
1221 struct iv *iv;
1222 struct iv *civ;
1223 tree stmt;
1224 struct iv_use *use;
1226 if (TREE_CODE (op) != SSA_NAME)
1227 return NULL;
1229 iv = get_iv (data, op);
1230 if (!iv)
1231 return NULL;
1233 if (iv->have_use_for)
1235 use = iv_use (data, iv->use_id);
1237 gcc_assert (use->type == USE_NONLINEAR_EXPR
1238 || use->type == USE_OUTER);
1240 if (type == USE_NONLINEAR_EXPR)
1241 use->type = USE_NONLINEAR_EXPR;
1242 return use;
1245 if (zero_p (iv->step))
1247 record_invariant (data, op, true);
1248 return NULL;
1250 iv->have_use_for = true;
1252 civ = xmalloc (sizeof (struct iv));
1253 *civ = *iv;
1255 stmt = SSA_NAME_DEF_STMT (op);
1256 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257 || TREE_CODE (stmt) == MODIFY_EXPR);
1259 use = record_use (data, NULL, civ, stmt, type);
1260 iv->use_id = use->id;
1262 return use;
1265 /* Checks whether the use OP is interesting and if so, records it. */
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1270 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1273 /* Records a definition of induction variable OP that is used outside of the
1274 loop. */
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1279 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283 and if so, records it. */
1285 static void
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1288 tree *op0_p;
1289 tree *op1_p;
1290 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291 struct iv const_iv;
1292 tree zero = integer_zero_node;
1294 const_iv.step = NULL_TREE;
1296 if (TREE_CODE (*cond_p) != SSA_NAME
1297 && !COMPARISON_CLASS_P (*cond_p))
1298 return;
1300 if (TREE_CODE (*cond_p) == SSA_NAME)
1302 op0_p = cond_p;
1303 op1_p = &zero;
1305 else
1307 op0_p = &TREE_OPERAND (*cond_p, 0);
1308 op1_p = &TREE_OPERAND (*cond_p, 1);
1311 if (TREE_CODE (*op0_p) == SSA_NAME)
1312 iv0 = get_iv (data, *op0_p);
1313 else
1314 iv0 = &const_iv;
1316 if (TREE_CODE (*op1_p) == SSA_NAME)
1317 iv1 = get_iv (data, *op1_p);
1318 else
1319 iv1 = &const_iv;
1321 if (/* When comparing with non-invariant value, we may not do any senseful
1322 induction variable elimination. */
1323 (!iv0 || !iv1)
1324 /* Eliminating condition based on two ivs would be nontrivial.
1325 ??? TODO -- it is not really important to handle this case. */
1326 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1328 find_interesting_uses_op (data, *op0_p);
1329 find_interesting_uses_op (data, *op1_p);
1330 return;
1333 if (zero_p (iv0->step) && zero_p (iv1->step))
1335 /* If both are invariants, this is a work for unswitching. */
1336 return;
1339 civ = xmalloc (sizeof (struct iv));
1340 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345 i.e. if all its operands are defined outside of the LOOP. */
1347 bool
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1350 basic_block def_bb;
1351 unsigned i, len;
1353 if (is_gimple_min_invariant (expr))
1354 return true;
1356 if (TREE_CODE (expr) == SSA_NAME)
1358 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359 if (def_bb
1360 && flow_bb_inside_loop_p (loop, def_bb))
1361 return false;
1363 return true;
1366 if (!EXPR_P (expr))
1367 return false;
1369 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370 for (i = 0; i < len; i++)
1371 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1372 return false;
1374 return true;
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378 initial ones. Returns false when the value of the index cannot be determined.
1379 Callback for for_each_index. */
1381 struct ifs_ivopts_data
1383 struct ivopts_data *ivopts_data;
1384 tree stmt;
1385 tree *step_p;
1388 static bool
1389 idx_find_step (tree base, tree *idx, void *data)
1391 struct ifs_ivopts_data *dta = data;
1392 struct iv *iv;
1393 tree step, iv_step, lbound, off;
1394 struct loop *loop = dta->ivopts_data->current_loop;
1396 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398 return false;
1400 /* If base is a component ref, require that the offset of the reference
1401 be invariant. */
1402 if (TREE_CODE (base) == COMPONENT_REF)
1404 off = component_ref_field_offset (base);
1405 return expr_invariant_in_loop_p (loop, off);
1408 /* If base is array, first check whether we will be able to move the
1409 reference out of the loop (in order to take its address in strength
1410 reduction). In order for this to work we need both lower bound
1411 and step to be loop invariants. */
1412 if (TREE_CODE (base) == ARRAY_REF)
1414 step = array_ref_element_size (base);
1415 lbound = array_ref_low_bound (base);
1417 if (!expr_invariant_in_loop_p (loop, step)
1418 || !expr_invariant_in_loop_p (loop, lbound))
1419 return false;
1422 if (TREE_CODE (*idx) != SSA_NAME)
1423 return true;
1425 iv = get_iv (dta->ivopts_data, *idx);
1426 if (!iv)
1427 return false;
1429 *idx = iv->base;
1431 if (!iv->step)
1432 return true;
1434 if (TREE_CODE (base) == ARRAY_REF)
1436 step = array_ref_element_size (base);
1438 /* We only handle addresses whose step is an integer constant. */
1439 if (TREE_CODE (step) != INTEGER_CST)
1440 return false;
1442 else
1443 /* The step for pointer arithmetics already is 1 byte. */
1444 step = build_int_cst (sizetype, 1);
1446 /* FIXME: convert_step should not be used outside chrec_convert: fix
1447 this by calling chrec_convert. */
1448 iv_step = convert_step (dta->ivopts_data->current_loop,
1449 sizetype, iv->base, iv->step, dta->stmt);
1451 if (!iv_step)
1453 /* The index might wrap. */
1454 return false;
1457 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1459 if (!*dta->step_p)
1460 *dta->step_p = step;
1461 else
1462 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1464 return true;
1467 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1470 static bool
1471 idx_record_use (tree base, tree *idx,
1472 void *data)
1474 find_interesting_uses_op (data, *idx);
1475 if (TREE_CODE (base) == ARRAY_REF)
1477 find_interesting_uses_op (data, array_ref_element_size (base));
1478 find_interesting_uses_op (data, array_ref_low_bound (base));
1480 return true;
1483 /* Returns true if memory reference REF may be unaligned. */
1485 static bool
1486 may_be_unaligned_p (tree ref)
1488 tree base;
1489 tree base_type;
1490 HOST_WIDE_INT bitsize;
1491 HOST_WIDE_INT bitpos;
1492 tree toffset;
1493 enum machine_mode mode;
1494 int unsignedp, volatilep;
1495 unsigned base_align;
1497 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498 thus they are not misaligned. */
1499 if (TREE_CODE (ref) == TARGET_MEM_REF)
1500 return false;
1502 /* The test below is basically copy of what expr.c:normal_inner_ref
1503 does to check whether the object must be loaded by parts when
1504 STRICT_ALIGNMENT is true. */
1505 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1506 &unsignedp, &volatilep, true);
1507 base_type = TREE_TYPE (base);
1508 base_align = TYPE_ALIGN (base_type);
1510 if (mode != BLKmode
1511 && (base_align < GET_MODE_ALIGNMENT (mode)
1512 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1513 || bitpos % BITS_PER_UNIT != 0))
1514 return true;
1516 return false;
1519 /* Finds addresses in *OP_P inside STMT. */
1521 static void
1522 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1524 tree base = *op_p, step = NULL;
1525 struct iv *civ;
1526 struct ifs_ivopts_data ifs_ivopts_data;
1528 /* Do not play with volatile memory references. A bit too conservative,
1529 perhaps, but safe. */
1530 if (stmt_ann (stmt)->has_volatile_ops)
1531 goto fail;
1533 /* Ignore bitfields for now. Not really something terribly complicated
1534 to handle. TODO. */
1535 if (TREE_CODE (base) == COMPONENT_REF
1536 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537 goto fail;
1539 if (STRICT_ALIGNMENT
1540 && may_be_unaligned_p (base))
1541 goto fail;
1543 base = unshare_expr (base);
1545 if (TREE_CODE (base) == TARGET_MEM_REF)
1547 tree type = build_pointer_type (TREE_TYPE (base));
1548 tree astep;
1550 if (TMR_BASE (base)
1551 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1553 civ = get_iv (data, TMR_BASE (base));
1554 if (!civ)
1555 goto fail;
1557 TMR_BASE (base) = civ->base;
1558 step = civ->step;
1560 if (TMR_INDEX (base)
1561 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1563 civ = get_iv (data, TMR_INDEX (base));
1564 if (!civ)
1565 goto fail;
1567 TMR_INDEX (base) = civ->base;
1568 astep = civ->step;
1570 if (astep)
1572 if (TMR_STEP (base))
1573 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1575 if (step)
1576 step = fold_build2 (PLUS_EXPR, type, step, astep);
1577 else
1578 step = astep;
1582 if (zero_p (step))
1583 goto fail;
1584 base = tree_mem_ref_addr (type, base);
1586 else
1588 ifs_ivopts_data.ivopts_data = data;
1589 ifs_ivopts_data.stmt = stmt;
1590 ifs_ivopts_data.step_p = &step;
1591 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1592 || zero_p (step))
1593 goto fail;
1595 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1596 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1598 base = build_fold_addr_expr (base);
1601 civ = alloc_iv (base, step);
1602 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1603 return;
1605 fail:
1606 for_each_index (op_p, idx_record_use, data);
1609 /* Finds and records invariants used in STMT. */
1611 static void
1612 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1614 ssa_op_iter iter;
1615 use_operand_p use_p;
1616 tree op;
1618 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1620 op = USE_FROM_PTR (use_p);
1621 record_invariant (data, op, false);
1625 /* Finds interesting uses of induction variables in the statement STMT. */
1627 static void
1628 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1630 struct iv *iv;
1631 tree op, lhs, rhs;
1632 ssa_op_iter iter;
1633 use_operand_p use_p;
1635 find_invariants_stmt (data, stmt);
1637 if (TREE_CODE (stmt) == COND_EXPR)
1639 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1640 return;
1643 if (TREE_CODE (stmt) == MODIFY_EXPR)
1645 lhs = TREE_OPERAND (stmt, 0);
1646 rhs = TREE_OPERAND (stmt, 1);
1648 if (TREE_CODE (lhs) == SSA_NAME)
1650 /* If the statement defines an induction variable, the uses are not
1651 interesting by themselves. */
1653 iv = get_iv (data, lhs);
1655 if (iv && !zero_p (iv->step))
1656 return;
1659 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1661 case tcc_comparison:
1662 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1663 return;
1665 case tcc_reference:
1666 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1667 if (REFERENCE_CLASS_P (lhs))
1668 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1669 return;
1671 default: ;
1674 if (REFERENCE_CLASS_P (lhs)
1675 && is_gimple_val (rhs))
1677 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1678 find_interesting_uses_op (data, rhs);
1679 return;
1682 /* TODO -- we should also handle address uses of type
1684 memory = call (whatever);
1688 call (memory). */
1691 if (TREE_CODE (stmt) == PHI_NODE
1692 && bb_for_stmt (stmt) == data->current_loop->header)
1694 lhs = PHI_RESULT (stmt);
1695 iv = get_iv (data, lhs);
1697 if (iv && !zero_p (iv->step))
1698 return;
1701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1703 op = USE_FROM_PTR (use_p);
1705 if (TREE_CODE (op) != SSA_NAME)
1706 continue;
1708 iv = get_iv (data, op);
1709 if (!iv)
1710 continue;
1712 find_interesting_uses_op (data, op);
1716 /* Finds interesting uses of induction variables outside of loops
1717 on loop exit edge EXIT. */
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1722 tree phi, def;
1724 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1726 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727 find_interesting_uses_outer (data, def);
1731 /* Finds uses of the induction variables that are interesting. */
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1736 basic_block bb;
1737 block_stmt_iterator bsi;
1738 tree phi;
1739 basic_block *body = get_loop_body (data->current_loop);
1740 unsigned i;
1741 struct version_info *info;
1742 edge e;
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "Uses:\n\n");
1747 for (i = 0; i < data->current_loop->num_nodes; i++)
1749 edge_iterator ei;
1750 bb = body[i];
1752 FOR_EACH_EDGE (e, ei, bb->succs)
1753 if (e->dest != EXIT_BLOCK_PTR
1754 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755 find_interesting_uses_outside (data, e);
1757 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758 find_interesting_uses_stmt (data, phi);
1759 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1763 if (dump_file && (dump_flags & TDF_DETAILS))
1765 bitmap_iterator bi;
1767 fprintf (dump_file, "\n");
1769 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1771 info = ver_info (data, i);
1772 if (info->inv_id)
1774 fprintf (dump_file, " ");
1775 print_generic_expr (dump_file, info->name, TDF_SLIM);
1776 fprintf (dump_file, " is invariant (%d)%s\n",
1777 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1781 fprintf (dump_file, "\n");
1784 free (body);
1787 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1788 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1789 we are at the top-level of the processed address. */
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793 unsigned HOST_WIDE_INT *offset)
1795 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796 enum tree_code code;
1797 tree type, orig_type = TREE_TYPE (expr);
1798 unsigned HOST_WIDE_INT off0, off1, st;
1799 tree orig_expr = expr;
1801 STRIP_NOPS (expr);
1803 type = TREE_TYPE (expr);
1804 code = TREE_CODE (expr);
1805 *offset = 0;
1807 switch (code)
1809 case INTEGER_CST:
1810 if (!cst_and_fits_in_hwi (expr)
1811 || zero_p (expr))
1812 return orig_expr;
1814 *offset = int_cst_value (expr);
1815 return build_int_cst_type (orig_type, 0);
1817 case PLUS_EXPR:
1818 case MINUS_EXPR:
1819 op0 = TREE_OPERAND (expr, 0);
1820 op1 = TREE_OPERAND (expr, 1);
1822 op0 = strip_offset_1 (op0, false, false, &off0);
1823 op1 = strip_offset_1 (op1, false, false, &off1);
1825 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826 if (op0 == TREE_OPERAND (expr, 0)
1827 && op1 == TREE_OPERAND (expr, 1))
1828 return orig_expr;
1830 if (zero_p (op1))
1831 expr = op0;
1832 else if (zero_p (op0))
1834 if (code == PLUS_EXPR)
1835 expr = op1;
1836 else
1837 expr = fold_build1 (NEGATE_EXPR, type, op1);
1839 else
1840 expr = fold_build2 (code, type, op0, op1);
1842 return fold_convert (orig_type, expr);
1844 case ARRAY_REF:
1845 if (!inside_addr)
1846 return orig_expr;
1848 step = array_ref_element_size (expr);
1849 if (!cst_and_fits_in_hwi (step))
1850 break;
1852 st = int_cst_value (step);
1853 op1 = TREE_OPERAND (expr, 1);
1854 op1 = strip_offset_1 (op1, false, false, &off1);
1855 *offset = off1 * st;
1857 if (top_compref
1858 && zero_p (op1))
1860 /* Strip the component reference completely. */
1861 op0 = TREE_OPERAND (expr, 0);
1862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863 *offset += off0;
1864 return op0;
1866 break;
1868 case COMPONENT_REF:
1869 if (!inside_addr)
1870 return orig_expr;
1872 tmp = component_ref_field_offset (expr);
1873 if (top_compref
1874 && cst_and_fits_in_hwi (tmp))
1876 /* Strip the component reference completely. */
1877 op0 = TREE_OPERAND (expr, 0);
1878 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879 *offset = off0 + int_cst_value (tmp);
1880 return op0;
1882 break;
1884 case ADDR_EXPR:
1885 op0 = TREE_OPERAND (expr, 0);
1886 op0 = strip_offset_1 (op0, true, true, &off0);
1887 *offset += off0;
1889 if (op0 == TREE_OPERAND (expr, 0))
1890 return orig_expr;
1892 expr = build_fold_addr_expr (op0);
1893 return fold_convert (orig_type, expr);
1895 case INDIRECT_REF:
1896 inside_addr = false;
1897 break;
1899 default:
1900 return orig_expr;
1903 /* Default handling of expressions for that we want to recurse into
1904 the first operand. */
1905 op0 = TREE_OPERAND (expr, 0);
1906 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907 *offset += off0;
1909 if (op0 == TREE_OPERAND (expr, 0)
1910 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911 return orig_expr;
1913 expr = copy_node (expr);
1914 TREE_OPERAND (expr, 0) = op0;
1915 if (op1)
1916 TREE_OPERAND (expr, 1) = op1;
1918 /* Inside address, we might strip the top level component references,
1919 thus changing type of the expression. Handling of ADDR_EXPR
1920 will fix that. */
1921 expr = fold_convert (orig_type, expr);
1923 return expr;
1926 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1931 return strip_offset_1 (expr, false, false, offset);
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935 For integer types, we return unsigned variant of the type, which
1936 avoids problems with overflows. For pointer types, we return void *. */
1938 static tree
1939 generic_type_for (tree type)
1941 if (POINTER_TYPE_P (type))
1942 return ptr_type_node;
1944 if (TYPE_UNSIGNED (type))
1945 return type;
1947 return unsigned_type_for (type);
1950 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1951 the bitmap to that we should store it. */
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1957 bitmap *depends_on = data;
1958 struct version_info *info;
1960 if (TREE_CODE (*expr_p) != SSA_NAME)
1961 return NULL_TREE;
1962 info = name_info (fd_ivopts_data, *expr_p);
1964 if (!info->inv_id || info->has_nonlin_use)
1965 return NULL_TREE;
1967 if (!*depends_on)
1968 *depends_on = BITMAP_ALLOC (NULL);
1969 bitmap_set_bit (*depends_on, info->inv_id);
1971 return NULL_TREE;
1974 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1975 position to POS. If USE is not NULL, the candidate is set as related to
1976 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1977 replacement of the final value of the iv by a direct computation. */
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981 tree base, tree step, bool important, enum iv_position pos,
1982 struct iv_use *use, tree incremented_at)
1984 unsigned i;
1985 struct iv_cand *cand = NULL;
1986 tree type, orig_type;
1988 if (base)
1990 orig_type = TREE_TYPE (base);
1991 type = generic_type_for (orig_type);
1992 if (type != orig_type)
1994 base = fold_convert (type, base);
1995 if (step)
1996 step = fold_convert (type, step);
2000 for (i = 0; i < n_iv_cands (data); i++)
2002 cand = iv_cand (data, i);
2004 if (cand->pos != pos)
2005 continue;
2007 if (cand->incremented_at != incremented_at)
2008 continue;
2010 if (!cand->iv)
2012 if (!base && !step)
2013 break;
2015 continue;
2018 if (!base && !step)
2019 continue;
2021 if (!operand_equal_p (base, cand->iv->base, 0))
2022 continue;
2024 if (zero_p (cand->iv->step))
2026 if (zero_p (step))
2027 break;
2029 else
2031 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 break;
2036 if (i == n_iv_cands (data))
2038 cand = xcalloc (1, sizeof (struct iv_cand));
2039 cand->id = i;
2041 if (!base && !step)
2042 cand->iv = NULL;
2043 else
2044 cand->iv = alloc_iv (base, step);
2046 cand->pos = pos;
2047 if (pos != IP_ORIGINAL && cand->iv)
2049 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050 cand->var_after = cand->var_before;
2052 cand->important = important;
2053 cand->incremented_at = incremented_at;
2054 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2056 if (step
2057 && TREE_CODE (step) != INTEGER_CST)
2059 fd_ivopts_data = data;
2060 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 dump_cand (dump_file, cand);
2067 if (important && !cand->important)
2069 cand->important = true;
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2074 if (use)
2076 bitmap_set_bit (use->related_cands, i);
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2078 fprintf (dump_file, "Candidate %d is related to use %d\n",
2079 cand->id, use->id);
2082 return cand;
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086 is allowed.
2088 The purpose is to avoid splitting latch edge with a biv increment, thus
2089 creating a jump, possibly confusing other optimization passes and leaving
2090 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2091 is not available (so we do not have a better alternative), or if the latch
2092 edge is already nonempty. */
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2097 if (!ip_normal_pos (loop))
2098 return true;
2100 if (!empty_block_p (ip_end_pos (loop)))
2101 return true;
2103 return false;
2106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2107 position to POS. If USE is not NULL, the candidate is set as related to
2108 it. The candidate computation is scheduled on all available positions. */
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112 tree base, tree step, bool important, struct iv_use *use)
2114 if (ip_normal_pos (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116 if (ip_end_pos (data->current_loop)
2117 && allow_ip_end_pos_p (data->current_loop))
2118 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122 type with SIZE bits. */
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126 unsigned int size)
2128 tree type = lang_hooks.types.type_for_size (size, true);
2129 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130 true, NULL);
2133 /* Adds standard iv candidates. */
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2140 /* The same for a double-integer type if it is still fast enough. */
2141 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2146 /* Adds candidates bases on the old induction variable IV. */
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2151 tree phi, def;
2152 struct iv_cand *cand;
2154 add_candidate (data, iv->base, iv->step, true, NULL);
2156 /* The same, but with initial value zero. */
2157 add_candidate (data,
2158 build_int_cst (TREE_TYPE (iv->base), 0),
2159 iv->step, true, NULL);
2161 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162 if (TREE_CODE (phi) == PHI_NODE)
2164 /* Additionally record the possibility of leaving the original iv
2165 untouched. */
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167 cand = add_candidate_1 (data,
2168 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169 SSA_NAME_DEF_STMT (def));
2170 cand->var_before = iv->ssa_name;
2171 cand->var_after = def;
2175 /* Adds candidates based on the old induction variables. */
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2180 unsigned i;
2181 struct iv *iv;
2182 bitmap_iterator bi;
2184 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2186 iv = ver_info (data, i)->iv;
2187 if (iv && iv->biv_p && !zero_p (iv->step))
2188 add_old_iv_candidates (data, iv);
2192 /* Adds candidates based on the value of the induction variable IV and USE. */
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196 struct iv *iv, struct iv_use *use)
2198 unsigned HOST_WIDE_INT offset;
2199 tree base;
2201 add_candidate (data, iv->base, iv->step, false, use);
2203 /* The same, but with initial value zero. Make such variable important,
2204 since it is generic enough so that possibly many uses may be based
2205 on it. */
2206 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207 iv->step, true, use);
2209 /* Third, try removing the constant offset. */
2210 base = strip_offset (iv->base, &offset);
2211 if (offset)
2212 add_candidate (data, base, iv->step, false, use);
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216 a direct computation. */
2218 static void
2219 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2221 struct tree_niter_desc *niter;
2223 /* We must know where we exit the loop and how many times does it roll. */
2224 niter = niter_for_single_dom_exit (data);
2225 if (!niter
2226 || !zero_p (niter->may_be_zero))
2227 return;
2229 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2232 /* Adds candidates based on the uses. */
2234 static void
2235 add_derived_ivs_candidates (struct ivopts_data *data)
2237 unsigned i;
2239 for (i = 0; i < n_iv_uses (data); i++)
2241 struct iv_use *use = iv_use (data, i);
2243 if (!use)
2244 continue;
2246 switch (use->type)
2248 case USE_NONLINEAR_EXPR:
2249 case USE_COMPARE:
2250 case USE_ADDRESS:
2251 /* Just add the ivs based on the value of the iv used here. */
2252 add_iv_value_candidates (data, use->iv, use);
2253 break;
2255 case USE_OUTER:
2256 add_iv_value_candidates (data, use->iv, use);
2258 /* Additionally, add the pseudocandidate for the possibility to
2259 replace the final value by a direct computation. */
2260 add_iv_outer_candidates (data, use);
2261 break;
2263 default:
2264 gcc_unreachable ();
2269 /* Record important candidates and add them to related_cands bitmaps
2270 if needed. */
2272 static void
2273 record_important_candidates (struct ivopts_data *data)
2275 unsigned i;
2276 struct iv_use *use;
2278 for (i = 0; i < n_iv_cands (data); i++)
2280 struct iv_cand *cand = iv_cand (data, i);
2282 if (cand->important)
2283 bitmap_set_bit (data->important_candidates, i);
2286 data->consider_all_candidates = (n_iv_cands (data)
2287 <= CONSIDER_ALL_CANDIDATES_BOUND);
2289 if (data->consider_all_candidates)
2291 /* We will not need "related_cands" bitmaps in this case,
2292 so release them to decrease peak memory consumption. */
2293 for (i = 0; i < n_iv_uses (data); i++)
2295 use = iv_use (data, i);
2296 BITMAP_FREE (use->related_cands);
2299 else
2301 /* Add important candidates to the related_cands bitmaps. */
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 bitmap_ior_into (iv_use (data, i)->related_cands,
2304 data->important_candidates);
2308 /* Finds the candidates for the induction variables. */
2310 static void
2311 find_iv_candidates (struct ivopts_data *data)
2313 /* Add commonly used ivs. */
2314 add_standard_iv_candidates (data);
2316 /* Add old induction variables. */
2317 add_old_ivs_candidates (data);
2319 /* Add induction variables derived from uses. */
2320 add_derived_ivs_candidates (data);
2322 /* Record the important candidates. */
2323 record_important_candidates (data);
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328 we allocate a simple list to every use. */
2330 static void
2331 alloc_use_cost_map (struct ivopts_data *data)
2333 unsigned i, size, s, j;
2335 for (i = 0; i < n_iv_uses (data); i++)
2337 struct iv_use *use = iv_use (data, i);
2338 bitmap_iterator bi;
2340 if (data->consider_all_candidates)
2341 size = n_iv_cands (data);
2342 else
2344 s = 0;
2345 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2347 s++;
2350 /* Round up to the power of two, so that moduling by it is fast. */
2351 for (size = 1; size < s; size <<= 1)
2352 continue;
2355 use->n_map_members = size;
2356 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361 on invariants DEPENDS_ON and that the value used in expressing it
2362 is VALUE.*/
2364 static void
2365 set_use_iv_cost (struct ivopts_data *data,
2366 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2367 bitmap depends_on, tree value)
2369 unsigned i, s;
2371 if (cost == INFTY)
2373 BITMAP_FREE (depends_on);
2374 return;
2377 if (data->consider_all_candidates)
2379 use->cost_map[cand->id].cand = cand;
2380 use->cost_map[cand->id].cost = cost;
2381 use->cost_map[cand->id].depends_on = depends_on;
2382 use->cost_map[cand->id].value = value;
2383 return;
2386 /* n_map_members is a power of two, so this computes modulo. */
2387 s = cand->id & (use->n_map_members - 1);
2388 for (i = s; i < use->n_map_members; i++)
2389 if (!use->cost_map[i].cand)
2390 goto found;
2391 for (i = 0; i < s; i++)
2392 if (!use->cost_map[i].cand)
2393 goto found;
2395 gcc_unreachable ();
2397 found:
2398 use->cost_map[i].cand = cand;
2399 use->cost_map[i].cost = cost;
2400 use->cost_map[i].depends_on = depends_on;
2401 use->cost_map[i].value = value;
2404 /* Gets cost of (USE, CANDIDATE) pair. */
2406 static struct cost_pair *
2407 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2408 struct iv_cand *cand)
2410 unsigned i, s;
2411 struct cost_pair *ret;
2413 if (!cand)
2414 return NULL;
2416 if (data->consider_all_candidates)
2418 ret = use->cost_map + cand->id;
2419 if (!ret->cand)
2420 return NULL;
2422 return ret;
2425 /* n_map_members is a power of two, so this computes modulo. */
2426 s = cand->id & (use->n_map_members - 1);
2427 for (i = s; i < use->n_map_members; i++)
2428 if (use->cost_map[i].cand == cand)
2429 return use->cost_map + i;
2431 for (i = 0; i < s; i++)
2432 if (use->cost_map[i].cand == cand)
2433 return use->cost_map + i;
2435 return NULL;
2438 /* Returns estimate on cost of computing SEQ. */
2440 static unsigned
2441 seq_cost (rtx seq)
2443 unsigned cost = 0;
2444 rtx set;
2446 for (; seq; seq = NEXT_INSN (seq))
2448 set = single_set (seq);
2449 if (set)
2450 cost += rtx_cost (set, SET);
2451 else
2452 cost++;
2455 return cost;
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2459 static rtx
2460 produce_memory_decl_rtl (tree obj, int *regno)
2462 rtx x;
2464 gcc_assert (obj);
2465 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2467 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2468 x = gen_rtx_SYMBOL_REF (Pmode, name);
2470 else
2471 x = gen_raw_REG (Pmode, (*regno)++);
2473 return gen_rtx_MEM (DECL_MODE (obj), x);
2476 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2477 walk_tree. DATA contains the actual fake register number. */
2479 static tree
2480 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2482 tree obj = NULL_TREE;
2483 rtx x = NULL_RTX;
2484 int *regno = data;
2486 switch (TREE_CODE (*expr_p))
2488 case ADDR_EXPR:
2489 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2490 handled_component_p (*expr_p);
2491 expr_p = &TREE_OPERAND (*expr_p, 0))
2492 continue;
2493 obj = *expr_p;
2494 if (DECL_P (obj))
2495 x = produce_memory_decl_rtl (obj, regno);
2496 break;
2498 case SSA_NAME:
2499 *ws = 0;
2500 obj = SSA_NAME_VAR (*expr_p);
2501 if (!DECL_RTL_SET_P (obj))
2502 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503 break;
2505 case VAR_DECL:
2506 case PARM_DECL:
2507 case RESULT_DECL:
2508 *ws = 0;
2509 obj = *expr_p;
2511 if (DECL_RTL_SET_P (obj))
2512 break;
2514 if (DECL_MODE (obj) == BLKmode)
2515 x = produce_memory_decl_rtl (obj, regno);
2516 else
2517 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2519 break;
2521 default:
2522 break;
2525 if (x)
2527 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2528 SET_DECL_RTL (obj, x);
2531 return NULL_TREE;
2534 /* Determines cost of the computation of EXPR. */
2536 static unsigned
2537 computation_cost (tree expr)
2539 rtx seq, rslt;
2540 tree type = TREE_TYPE (expr);
2541 unsigned cost;
2542 /* Avoid using hard regs in ways which may be unsupported. */
2543 int regno = LAST_VIRTUAL_REGISTER + 1;
2545 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2546 start_sequence ();
2547 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2548 seq = get_insns ();
2549 end_sequence ();
2551 cost = seq_cost (seq);
2552 if (MEM_P (rslt))
2553 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2555 return cost;
2558 /* Returns variable containing the value of candidate CAND at statement AT. */
2560 static tree
2561 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2563 if (stmt_after_increment (loop, cand, stmt))
2564 return cand->var_after;
2565 else
2566 return cand->var_before;
2569 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2570 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2573 tree_int_cst_sign_bit (tree t)
2575 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2576 unsigned HOST_WIDE_INT w;
2578 if (bitno < HOST_BITS_PER_WIDE_INT)
2579 w = TREE_INT_CST_LOW (t);
2580 else
2582 w = TREE_INT_CST_HIGH (t);
2583 bitno -= HOST_BITS_PER_WIDE_INT;
2586 return (w >> bitno) & 1;
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590 return cst. Otherwise return NULL_TREE. */
2592 static tree
2593 constant_multiple_of (tree type, tree top, tree bot)
2595 tree res, mby, p0, p1;
2596 enum tree_code code;
2597 bool negate;
2599 STRIP_NOPS (top);
2600 STRIP_NOPS (bot);
2602 if (operand_equal_p (top, bot, 0))
2603 return build_int_cst (type, 1);
2605 code = TREE_CODE (top);
2606 switch (code)
2608 case MULT_EXPR:
2609 mby = TREE_OPERAND (top, 1);
2610 if (TREE_CODE (mby) != INTEGER_CST)
2611 return NULL_TREE;
2613 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614 if (!res)
2615 return NULL_TREE;
2617 return fold_binary_to_constant (MULT_EXPR, type, res,
2618 fold_convert (type, mby));
2620 case PLUS_EXPR:
2621 case MINUS_EXPR:
2622 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623 if (!p0)
2624 return NULL_TREE;
2625 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2626 if (!p1)
2627 return NULL_TREE;
2629 return fold_binary_to_constant (code, type, p0, p1);
2631 case INTEGER_CST:
2632 if (TREE_CODE (bot) != INTEGER_CST)
2633 return NULL_TREE;
2635 bot = fold_convert (type, bot);
2636 top = fold_convert (type, top);
2638 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639 the result afterwards. */
2640 if (tree_int_cst_sign_bit (bot))
2642 negate = true;
2643 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2645 else
2646 negate = false;
2648 /* Ditto for TOP. */
2649 if (tree_int_cst_sign_bit (top))
2651 negate = !negate;
2652 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2655 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656 return NULL_TREE;
2658 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2659 if (negate)
2660 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2661 return res;
2663 default:
2664 return NULL_TREE;
2668 /* Sets COMB to CST. */
2670 static void
2671 aff_combination_const (struct affine_tree_combination *comb, tree type,
2672 unsigned HOST_WIDE_INT cst)
2674 unsigned prec = TYPE_PRECISION (type);
2676 comb->type = type;
2677 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2679 comb->n = 0;
2680 comb->rest = NULL_TREE;
2681 comb->offset = cst & comb->mask;
2684 /* Sets COMB to single element ELT. */
2686 static void
2687 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2689 unsigned prec = TYPE_PRECISION (type);
2691 comb->type = type;
2692 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2694 comb->n = 1;
2695 comb->elts[0] = elt;
2696 comb->coefs[0] = 1;
2697 comb->rest = NULL_TREE;
2698 comb->offset = 0;
2701 /* Scales COMB by SCALE. */
2703 static void
2704 aff_combination_scale (struct affine_tree_combination *comb,
2705 unsigned HOST_WIDE_INT scale)
2707 unsigned i, j;
2709 if (scale == 1)
2710 return;
2712 if (scale == 0)
2714 aff_combination_const (comb, comb->type, 0);
2715 return;
2718 comb->offset = (scale * comb->offset) & comb->mask;
2719 for (i = 0, j = 0; i < comb->n; i++)
2721 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2722 comb->elts[j] = comb->elts[i];
2723 if (comb->coefs[j] != 0)
2724 j++;
2726 comb->n = j;
2728 if (comb->rest)
2730 if (comb->n < MAX_AFF_ELTS)
2732 comb->coefs[comb->n] = scale;
2733 comb->elts[comb->n] = comb->rest;
2734 comb->rest = NULL_TREE;
2735 comb->n++;
2737 else
2738 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2739 build_int_cst_type (comb->type, scale));
2743 /* Adds ELT * SCALE to COMB. */
2745 static void
2746 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2747 unsigned HOST_WIDE_INT scale)
2749 unsigned i;
2751 if (scale == 0)
2752 return;
2754 for (i = 0; i < comb->n; i++)
2755 if (operand_equal_p (comb->elts[i], elt, 0))
2757 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758 if (comb->coefs[i])
2759 return;
2761 comb->n--;
2762 comb->coefs[i] = comb->coefs[comb->n];
2763 comb->elts[i] = comb->elts[comb->n];
2765 if (comb->rest)
2767 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2768 comb->coefs[comb->n] = 1;
2769 comb->elts[comb->n] = comb->rest;
2770 comb->rest = NULL_TREE;
2771 comb->n++;
2773 return;
2775 if (comb->n < MAX_AFF_ELTS)
2777 comb->coefs[comb->n] = scale;
2778 comb->elts[comb->n] = elt;
2779 comb->n++;
2780 return;
2783 if (scale == 1)
2784 elt = fold_convert (comb->type, elt);
2785 else
2786 elt = fold_build2 (MULT_EXPR, comb->type,
2787 fold_convert (comb->type, elt),
2788 build_int_cst_type (comb->type, scale));
2790 if (comb->rest)
2791 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2792 else
2793 comb->rest = elt;
2796 /* Adds COMB2 to COMB1. */
2798 static void
2799 aff_combination_add (struct affine_tree_combination *comb1,
2800 struct affine_tree_combination *comb2)
2802 unsigned i;
2804 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2805 for (i = 0; i < comb2->n; i++)
2806 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2807 if (comb2->rest)
2808 aff_combination_add_elt (comb1, comb2->rest, 1);
2811 /* Splits EXPR into an affine combination of parts. */
2813 static void
2814 tree_to_aff_combination (tree expr, tree type,
2815 struct affine_tree_combination *comb)
2817 struct affine_tree_combination tmp;
2818 enum tree_code code;
2819 tree cst, core, toffset;
2820 HOST_WIDE_INT bitpos, bitsize;
2821 enum machine_mode mode;
2822 int unsignedp, volatilep;
2824 STRIP_NOPS (expr);
2826 code = TREE_CODE (expr);
2827 switch (code)
2829 case INTEGER_CST:
2830 aff_combination_const (comb, type, int_cst_value (expr));
2831 return;
2833 case PLUS_EXPR:
2834 case MINUS_EXPR:
2835 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2837 if (code == MINUS_EXPR)
2838 aff_combination_scale (&tmp, -1);
2839 aff_combination_add (comb, &tmp);
2840 return;
2842 case MULT_EXPR:
2843 cst = TREE_OPERAND (expr, 1);
2844 if (TREE_CODE (cst) != INTEGER_CST)
2845 break;
2846 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2847 aff_combination_scale (comb, int_cst_value (cst));
2848 return;
2850 case NEGATE_EXPR:
2851 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2852 aff_combination_scale (comb, -1);
2853 return;
2855 case ADDR_EXPR:
2856 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2857 &toffset, &mode, &unsignedp, &volatilep,
2858 false);
2859 if (bitpos % BITS_PER_UNIT != 0)
2860 break;
2861 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2862 core = build_fold_addr_expr (core);
2863 if (TREE_CODE (core) == ADDR_EXPR)
2864 aff_combination_add_elt (comb, core, 1);
2865 else
2867 tree_to_aff_combination (core, type, &tmp);
2868 aff_combination_add (comb, &tmp);
2870 if (toffset)
2872 tree_to_aff_combination (toffset, type, &tmp);
2873 aff_combination_add (comb, &tmp);
2875 return;
2877 default:
2878 break;
2881 aff_combination_elt (comb, type, expr);
2884 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2886 static tree
2887 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2888 unsigned HOST_WIDE_INT mask)
2890 enum tree_code code;
2892 scale &= mask;
2893 elt = fold_convert (type, elt);
2895 if (scale == 1)
2897 if (!expr)
2898 return elt;
2900 return fold_build2 (PLUS_EXPR, type, expr, elt);
2903 if (scale == mask)
2905 if (!expr)
2906 return fold_build1 (NEGATE_EXPR, type, elt);
2908 return fold_build2 (MINUS_EXPR, type, expr, elt);
2911 if (!expr)
2912 return fold_build2 (MULT_EXPR, type, elt,
2913 build_int_cst_type (type, scale));
2915 if ((scale | (mask >> 1)) == mask)
2917 /* Scale is negative. */
2918 code = MINUS_EXPR;
2919 scale = (-scale) & mask;
2921 else
2922 code = PLUS_EXPR;
2924 elt = fold_build2 (MULT_EXPR, type, elt,
2925 build_int_cst_type (type, scale));
2926 return fold_build2 (code, type, expr, elt);
2929 /* Copies the tree elements of COMB to ensure that they are not shared. */
2931 static void
2932 unshare_aff_combination (struct affine_tree_combination *comb)
2934 unsigned i;
2936 for (i = 0; i < comb->n; i++)
2937 comb->elts[i] = unshare_expr (comb->elts[i]);
2938 if (comb->rest)
2939 comb->rest = unshare_expr (comb->rest);
2942 /* Makes tree from the affine combination COMB. */
2944 static tree
2945 aff_combination_to_tree (struct affine_tree_combination *comb)
2947 tree type = comb->type;
2948 tree expr = comb->rest;
2949 unsigned i;
2950 unsigned HOST_WIDE_INT off, sgn;
2952 /* Handle the special case produced by get_computation_aff when
2953 the type does not fit in HOST_WIDE_INT. */
2954 if (comb->n == 0 && comb->offset == 0)
2955 return fold_convert (type, expr);
2957 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2959 for (i = 0; i < comb->n; i++)
2960 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2961 comb->mask);
2963 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2965 /* Offset is negative. */
2966 off = (-comb->offset) & comb->mask;
2967 sgn = comb->mask;
2969 else
2971 off = comb->offset;
2972 sgn = 1;
2974 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2975 comb->mask);
2978 /* Determines the expression by that USE is expressed from induction variable
2979 CAND at statement AT in LOOP. The expression is stored in a decomposed
2980 form into AFF. Returns false if USE cannot be expressed using CAND. */
2982 static bool
2983 get_computation_aff (struct loop *loop,
2984 struct iv_use *use, struct iv_cand *cand, tree at,
2985 struct affine_tree_combination *aff)
2987 tree ubase = use->iv->base;
2988 tree ustep = use->iv->step;
2989 tree cbase = cand->iv->base;
2990 tree cstep = cand->iv->step;
2991 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2992 tree uutype;
2993 tree expr, delta;
2994 tree ratio;
2995 unsigned HOST_WIDE_INT ustepi, cstepi;
2996 HOST_WIDE_INT ratioi;
2997 struct affine_tree_combination cbase_aff, expr_aff;
2998 tree cstep_orig = cstep, ustep_orig = ustep;
3000 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3002 /* We do not have a precision to express the values of use. */
3003 return false;
3006 expr = var_at_stmt (loop, cand, at);
3008 if (TREE_TYPE (expr) != ctype)
3010 /* This may happen with the original ivs. */
3011 expr = fold_convert (ctype, expr);
3014 if (TYPE_UNSIGNED (utype))
3015 uutype = utype;
3016 else
3018 uutype = unsigned_type_for (utype);
3019 ubase = fold_convert (uutype, ubase);
3020 ustep = fold_convert (uutype, ustep);
3023 if (uutype != ctype)
3025 expr = fold_convert (uutype, expr);
3026 cbase = fold_convert (uutype, cbase);
3027 cstep = fold_convert (uutype, cstep);
3029 /* If the conversion is not noop, we must take it into account when
3030 considering the value of the step. */
3031 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3032 cstep_orig = cstep;
3035 if (cst_and_fits_in_hwi (cstep_orig)
3036 && cst_and_fits_in_hwi (ustep_orig))
3038 ustepi = int_cst_value (ustep_orig);
3039 cstepi = int_cst_value (cstep_orig);
3041 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3043 /* TODO maybe consider case when ustep divides cstep and the ratio is
3044 a power of 2 (so that the division is fast to execute)? We would
3045 need to be much more careful with overflows etc. then. */
3046 return false;
3049 ratio = build_int_cst_type (uutype, ratioi);
3051 else
3053 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3054 if (!ratio)
3055 return false;
3057 /* Ratioi is only used to detect special cases when the multiplicative
3058 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3059 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3060 to integer_onep/integer_all_onesp, since the former ignores
3061 TREE_OVERFLOW. */
3062 if (cst_and_fits_in_hwi (ratio))
3063 ratioi = int_cst_value (ratio);
3064 else if (integer_onep (ratio))
3065 ratioi = 1;
3066 else if (integer_all_onesp (ratio))
3067 ratioi = -1;
3068 else
3069 ratioi = 0;
3072 /* We may need to shift the value if we are after the increment. */
3073 if (stmt_after_increment (loop, cand, at))
3074 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3076 /* use = ubase - ratio * cbase + ratio * var.
3078 In general case ubase + ratio * (var - cbase) could be better (one less
3079 multiplication), but often it is possible to eliminate redundant parts
3080 of computations from (ubase - ratio * cbase) term, and if it does not
3081 happen, fold is able to apply the distributive law to obtain this form
3082 anyway. */
3084 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3086 /* Let's compute in trees and just return the result in AFF. This case
3087 should not be very common, and fold itself is not that bad either,
3088 so making the aff. functions more complicated to handle this case
3089 is not that urgent. */
3090 if (ratioi == 1)
3092 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3093 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3095 else if (ratioi == -1)
3097 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3098 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3100 else
3102 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3103 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3104 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3105 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3108 aff->type = uutype;
3109 aff->n = 0;
3110 aff->offset = 0;
3111 aff->mask = 0;
3112 aff->rest = expr;
3113 return true;
3116 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3117 possible to compute ratioi. */
3118 gcc_assert (ratioi);
3120 tree_to_aff_combination (ubase, uutype, aff);
3121 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3122 tree_to_aff_combination (expr, uutype, &expr_aff);
3123 aff_combination_scale (&cbase_aff, -ratioi);
3124 aff_combination_scale (&expr_aff, ratioi);
3125 aff_combination_add (aff, &cbase_aff);
3126 aff_combination_add (aff, &expr_aff);
3128 return true;
3131 /* Determines the expression by that USE is expressed from induction variable
3132 CAND at statement AT in LOOP. The computation is unshared. */
3134 static tree
3135 get_computation_at (struct loop *loop,
3136 struct iv_use *use, struct iv_cand *cand, tree at)
3138 struct affine_tree_combination aff;
3139 tree type = TREE_TYPE (use->iv->base);
3141 if (!get_computation_aff (loop, use, cand, at, &aff))
3142 return NULL_TREE;
3143 unshare_aff_combination (&aff);
3144 return fold_convert (type, aff_combination_to_tree (&aff));
3147 /* Determines the expression by that USE is expressed from induction variable
3148 CAND in LOOP. The computation is unshared. */
3150 static tree
3151 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3153 return get_computation_at (loop, use, cand, use->stmt);
3156 /* Returns cost of addition in MODE. */
3158 static unsigned
3159 add_cost (enum machine_mode mode)
3161 static unsigned costs[NUM_MACHINE_MODES];
3162 rtx seq;
3163 unsigned cost;
3165 if (costs[mode])
3166 return costs[mode];
3168 start_sequence ();
3169 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3170 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3171 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3172 NULL_RTX);
3173 seq = get_insns ();
3174 end_sequence ();
3176 cost = seq_cost (seq);
3177 if (!cost)
3178 cost = 1;
3180 costs[mode] = cost;
3182 if (dump_file && (dump_flags & TDF_DETAILS))
3183 fprintf (dump_file, "Addition in %s costs %d\n",
3184 GET_MODE_NAME (mode), cost);
3185 return cost;
3188 /* Entry in a hashtable of already known costs for multiplication. */
3189 struct mbc_entry
3191 HOST_WIDE_INT cst; /* The constant to multiply by. */
3192 enum machine_mode mode; /* In mode. */
3193 unsigned cost; /* The cost. */
3196 /* Counts hash value for the ENTRY. */
3198 static hashval_t
3199 mbc_entry_hash (const void *entry)
3201 const struct mbc_entry *e = entry;
3203 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3206 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3208 static int
3209 mbc_entry_eq (const void *entry1, const void *entry2)
3211 const struct mbc_entry *e1 = entry1;
3212 const struct mbc_entry *e2 = entry2;
3214 return (e1->mode == e2->mode
3215 && e1->cst == e2->cst);
3218 /* Returns cost of multiplication by constant CST in MODE. */
3220 unsigned
3221 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3223 static htab_t costs;
3224 struct mbc_entry **cached, act;
3225 rtx seq;
3226 unsigned cost;
3228 if (!costs)
3229 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3231 act.mode = mode;
3232 act.cst = cst;
3233 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3234 if (*cached)
3235 return (*cached)->cost;
3237 *cached = xmalloc (sizeof (struct mbc_entry));
3238 (*cached)->mode = mode;
3239 (*cached)->cst = cst;
3241 start_sequence ();
3242 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3243 gen_int_mode (cst, mode), NULL_RTX, 0);
3244 seq = get_insns ();
3245 end_sequence ();
3247 cost = seq_cost (seq);
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3251 (int) cst, GET_MODE_NAME (mode), cost);
3253 (*cached)->cost = cost;
3255 return cost;
3258 /* Returns true if multiplying by RATIO is allowed in address. */
3260 bool
3261 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3263 #define MAX_RATIO 128
3264 static sbitmap valid_mult;
3266 if (!valid_mult)
3268 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3269 rtx addr;
3270 HOST_WIDE_INT i;
3272 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3273 sbitmap_zero (valid_mult);
3274 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3275 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3277 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3278 if (memory_address_p (Pmode, addr))
3279 SET_BIT (valid_mult, i + MAX_RATIO);
3282 if (dump_file && (dump_flags & TDF_DETAILS))
3284 fprintf (dump_file, " allowed multipliers:");
3285 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3286 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3287 fprintf (dump_file, " %d", (int) i);
3288 fprintf (dump_file, "\n");
3289 fprintf (dump_file, "\n");
3293 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3294 return false;
3296 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3299 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3300 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3301 variable is omitted. The created memory accesses MODE.
3303 TODO -- there must be some better way. This all is quite crude. */
3305 static unsigned
3306 get_address_cost (bool symbol_present, bool var_present,
3307 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3309 static bool initialized = false;
3310 static HOST_WIDE_INT rat, off;
3311 static HOST_WIDE_INT min_offset, max_offset;
3312 static unsigned costs[2][2][2][2];
3313 unsigned cost, acost;
3314 rtx seq, addr, base;
3315 bool offset_p, ratio_p;
3316 rtx reg1;
3317 HOST_WIDE_INT s_offset;
3318 unsigned HOST_WIDE_INT mask;
3319 unsigned bits;
3321 if (!initialized)
3323 HOST_WIDE_INT i;
3324 initialized = true;
3326 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3328 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3329 for (i = 1; i <= 1 << 20; i <<= 1)
3331 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3332 if (!memory_address_p (Pmode, addr))
3333 break;
3335 max_offset = i >> 1;
3336 off = max_offset;
3338 for (i = 1; i <= 1 << 20; i <<= 1)
3340 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3341 if (!memory_address_p (Pmode, addr))
3342 break;
3344 min_offset = -(i >> 1);
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3348 fprintf (dump_file, "get_address_cost:\n");
3349 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3350 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3353 rat = 1;
3354 for (i = 2; i <= MAX_RATIO; i++)
3355 if (multiplier_allowed_in_address_p (i))
3357 rat = i;
3358 break;
3362 bits = GET_MODE_BITSIZE (Pmode);
3363 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3364 offset &= mask;
3365 if ((offset >> (bits - 1) & 1))
3366 offset |= ~mask;
3367 s_offset = offset;
3369 cost = 0;
3370 offset_p = (s_offset != 0
3371 && min_offset <= s_offset && s_offset <= max_offset);
3372 ratio_p = (ratio != 1
3373 && multiplier_allowed_in_address_p (ratio));
3375 if (ratio != 1 && !ratio_p)
3376 cost += multiply_by_cost (ratio, Pmode);
3378 if (s_offset && !offset_p && !symbol_present)
3380 cost += add_cost (Pmode);
3381 var_present = true;
3384 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3385 if (!acost)
3387 int old_cse_not_expected;
3388 acost = 0;
3390 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3391 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3392 if (ratio_p)
3393 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3395 if (var_present)
3396 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3398 if (symbol_present)
3400 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3401 if (offset_p)
3402 base = gen_rtx_fmt_e (CONST, Pmode,
3403 gen_rtx_fmt_ee (PLUS, Pmode,
3404 base,
3405 gen_int_mode (off, Pmode)));
3407 else if (offset_p)
3408 base = gen_int_mode (off, Pmode);
3409 else
3410 base = NULL_RTX;
3412 if (base)
3413 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3415 start_sequence ();
3416 /* To avoid splitting addressing modes, pretend that no cse will
3417 follow. */
3418 old_cse_not_expected = cse_not_expected;
3419 cse_not_expected = true;
3420 addr = memory_address (Pmode, addr);
3421 cse_not_expected = old_cse_not_expected;
3422 seq = get_insns ();
3423 end_sequence ();
3425 acost = seq_cost (seq);
3426 acost += address_cost (addr, Pmode);
3428 if (!acost)
3429 acost = 1;
3430 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3433 return cost + acost;
3436 /* Estimates cost of forcing expression EXPR into a variable. */
3438 unsigned
3439 force_expr_to_var_cost (tree expr)
3441 static bool costs_initialized = false;
3442 static unsigned integer_cost;
3443 static unsigned symbol_cost;
3444 static unsigned address_cost;
3445 tree op0, op1;
3446 unsigned cost0, cost1, cost;
3447 enum machine_mode mode;
3449 if (!costs_initialized)
3451 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3452 rtx x = gen_rtx_MEM (DECL_MODE (var),
3453 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3454 tree addr;
3455 tree type = build_pointer_type (integer_type_node);
3457 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3458 2000));
3460 SET_DECL_RTL (var, x);
3461 TREE_STATIC (var) = 1;
3462 addr = build1 (ADDR_EXPR, type, var);
3463 symbol_cost = computation_cost (addr) + 1;
3465 address_cost
3466 = computation_cost (build2 (PLUS_EXPR, type,
3467 addr,
3468 build_int_cst_type (type, 2000))) + 1;
3469 if (dump_file && (dump_flags & TDF_DETAILS))
3471 fprintf (dump_file, "force_expr_to_var_cost:\n");
3472 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3473 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3474 fprintf (dump_file, " address %d\n", (int) address_cost);
3475 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3476 fprintf (dump_file, "\n");
3479 costs_initialized = true;
3482 STRIP_NOPS (expr);
3484 if (SSA_VAR_P (expr))
3485 return 0;
3487 if (TREE_INVARIANT (expr))
3489 if (TREE_CODE (expr) == INTEGER_CST)
3490 return integer_cost;
3492 if (TREE_CODE (expr) == ADDR_EXPR)
3494 tree obj = TREE_OPERAND (expr, 0);
3496 if (TREE_CODE (obj) == VAR_DECL
3497 || TREE_CODE (obj) == PARM_DECL
3498 || TREE_CODE (obj) == RESULT_DECL)
3499 return symbol_cost;
3502 return address_cost;
3505 switch (TREE_CODE (expr))
3507 case PLUS_EXPR:
3508 case MINUS_EXPR:
3509 case MULT_EXPR:
3510 op0 = TREE_OPERAND (expr, 0);
3511 op1 = TREE_OPERAND (expr, 1);
3512 STRIP_NOPS (op0);
3513 STRIP_NOPS (op1);
3515 if (is_gimple_val (op0))
3516 cost0 = 0;
3517 else
3518 cost0 = force_expr_to_var_cost (op0);
3520 if (is_gimple_val (op1))
3521 cost1 = 0;
3522 else
3523 cost1 = force_expr_to_var_cost (op1);
3525 break;
3527 default:
3528 /* Just an arbitrary value, FIXME. */
3529 return target_spill_cost;
3532 mode = TYPE_MODE (TREE_TYPE (expr));
3533 switch (TREE_CODE (expr))
3535 case PLUS_EXPR:
3536 case MINUS_EXPR:
3537 cost = add_cost (mode);
3538 break;
3540 case MULT_EXPR:
3541 if (cst_and_fits_in_hwi (op0))
3542 cost = multiply_by_cost (int_cst_value (op0), mode);
3543 else if (cst_and_fits_in_hwi (op1))
3544 cost = multiply_by_cost (int_cst_value (op1), mode);
3545 else
3546 return target_spill_cost;
3547 break;
3549 default:
3550 gcc_unreachable ();
3553 cost += cost0;
3554 cost += cost1;
3556 /* Bound the cost by target_spill_cost. The parts of complicated
3557 computations often are either loop invariant or at least can
3558 be shared between several iv uses, so letting this grow without
3559 limits would not give reasonable results. */
3560 return cost < target_spill_cost ? cost : target_spill_cost;
3563 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3564 invariants the computation depends on. */
3566 static unsigned
3567 force_var_cost (struct ivopts_data *data,
3568 tree expr, bitmap *depends_on)
3570 if (depends_on)
3572 fd_ivopts_data = data;
3573 walk_tree (&expr, find_depends, depends_on, NULL);
3576 return force_expr_to_var_cost (expr);
3579 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3580 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3581 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3582 invariants the computation depends on. */
3584 static unsigned
3585 split_address_cost (struct ivopts_data *data,
3586 tree addr, bool *symbol_present, bool *var_present,
3587 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3589 tree core;
3590 HOST_WIDE_INT bitsize;
3591 HOST_WIDE_INT bitpos;
3592 tree toffset;
3593 enum machine_mode mode;
3594 int unsignedp, volatilep;
3596 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3597 &unsignedp, &volatilep, false);
3599 if (toffset != 0
3600 || bitpos % BITS_PER_UNIT != 0
3601 || TREE_CODE (core) != VAR_DECL)
3603 *symbol_present = false;
3604 *var_present = true;
3605 fd_ivopts_data = data;
3606 walk_tree (&addr, find_depends, depends_on, NULL);
3607 return target_spill_cost;
3610 *offset += bitpos / BITS_PER_UNIT;
3611 if (TREE_STATIC (core)
3612 || DECL_EXTERNAL (core))
3614 *symbol_present = true;
3615 *var_present = false;
3616 return 0;
3619 *symbol_present = false;
3620 *var_present = true;
3621 return 0;
3624 /* Estimates cost of expressing difference of addresses E1 - E2 as
3625 var + symbol + offset. The value of offset is added to OFFSET,
3626 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3627 part is missing. DEPENDS_ON is a set of the invariants the computation
3628 depends on. */
3630 static unsigned
3631 ptr_difference_cost (struct ivopts_data *data,
3632 tree e1, tree e2, bool *symbol_present, bool *var_present,
3633 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3635 HOST_WIDE_INT diff = 0;
3636 unsigned cost;
3638 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3640 if (ptr_difference_const (e1, e2, &diff))
3642 *offset += diff;
3643 *symbol_present = false;
3644 *var_present = false;
3645 return 0;
3648 if (e2 == integer_zero_node)
3649 return split_address_cost (data, TREE_OPERAND (e1, 0),
3650 symbol_present, var_present, offset, depends_on);
3652 *symbol_present = false;
3653 *var_present = true;
3655 cost = force_var_cost (data, e1, depends_on);
3656 cost += force_var_cost (data, e2, depends_on);
3657 cost += add_cost (Pmode);
3659 return cost;
3662 /* Estimates cost of expressing difference E1 - E2 as
3663 var + symbol + offset. The value of offset is added to OFFSET,
3664 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3665 part is missing. DEPENDS_ON is a set of the invariants the computation
3666 depends on. */
3668 static unsigned
3669 difference_cost (struct ivopts_data *data,
3670 tree e1, tree e2, bool *symbol_present, bool *var_present,
3671 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3673 unsigned cost;
3674 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3675 unsigned HOST_WIDE_INT off1, off2;
3677 e1 = strip_offset (e1, &off1);
3678 e2 = strip_offset (e2, &off2);
3679 *offset += off1 - off2;
3681 STRIP_NOPS (e1);
3682 STRIP_NOPS (e2);
3684 if (TREE_CODE (e1) == ADDR_EXPR)
3685 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3686 depends_on);
3687 *symbol_present = false;
3689 if (operand_equal_p (e1, e2, 0))
3691 *var_present = false;
3692 return 0;
3694 *var_present = true;
3695 if (zero_p (e2))
3696 return force_var_cost (data, e1, depends_on);
3698 if (zero_p (e1))
3700 cost = force_var_cost (data, e2, depends_on);
3701 cost += multiply_by_cost (-1, mode);
3703 return cost;
3706 cost = force_var_cost (data, e1, depends_on);
3707 cost += force_var_cost (data, e2, depends_on);
3708 cost += add_cost (mode);
3710 return cost;
3713 /* Determines the cost of the computation by that USE is expressed
3714 from induction variable CAND. If ADDRESS_P is true, we just need
3715 to create an address from it, otherwise we want to get it into
3716 register. A set of invariants we depend on is stored in
3717 DEPENDS_ON. AT is the statement at that the value is computed. */
3719 static unsigned
3720 get_computation_cost_at (struct ivopts_data *data,
3721 struct iv_use *use, struct iv_cand *cand,
3722 bool address_p, bitmap *depends_on, tree at)
3724 tree ubase = use->iv->base, ustep = use->iv->step;
3725 tree cbase, cstep;
3726 tree utype = TREE_TYPE (ubase), ctype;
3727 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3728 HOST_WIDE_INT ratio, aratio;
3729 bool var_present, symbol_present;
3730 unsigned cost = 0, n_sums;
3732 *depends_on = NULL;
3734 /* Only consider real candidates. */
3735 if (!cand->iv)
3736 return INFTY;
3738 cbase = cand->iv->base;
3739 cstep = cand->iv->step;
3740 ctype = TREE_TYPE (cbase);
3742 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3744 /* We do not have a precision to express the values of use. */
3745 return INFTY;
3748 if (address_p)
3750 /* Do not try to express address of an object with computation based
3751 on address of a different object. This may cause problems in rtl
3752 level alias analysis (that does not expect this to be happening,
3753 as this is illegal in C), and would be unlikely to be useful
3754 anyway. */
3755 if (use->iv->base_object
3756 && cand->iv->base_object
3757 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3758 return INFTY;
3761 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3763 /* TODO -- add direct handling of this case. */
3764 goto fallback;
3767 /* CSTEPI is removed from the offset in case statement is after the
3768 increment. If the step is not constant, we use zero instead.
3769 This is a bit imprecise (there is the extra addition), but
3770 redundancy elimination is likely to transform the code so that
3771 it uses value of the variable before increment anyway,
3772 so it is not that much unrealistic. */
3773 if (cst_and_fits_in_hwi (cstep))
3774 cstepi = int_cst_value (cstep);
3775 else
3776 cstepi = 0;
3778 if (cst_and_fits_in_hwi (ustep)
3779 && cst_and_fits_in_hwi (cstep))
3781 ustepi = int_cst_value (ustep);
3783 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3784 return INFTY;
3786 else
3788 tree rat;
3790 rat = constant_multiple_of (utype, ustep, cstep);
3792 if (!rat)
3793 return INFTY;
3795 if (cst_and_fits_in_hwi (rat))
3796 ratio = int_cst_value (rat);
3797 else if (integer_onep (rat))
3798 ratio = 1;
3799 else if (integer_all_onesp (rat))
3800 ratio = -1;
3801 else
3802 return INFTY;
3805 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3806 or ratio == 1, it is better to handle this like
3808 ubase - ratio * cbase + ratio * var
3810 (also holds in the case ratio == -1, TODO. */
3812 if (cst_and_fits_in_hwi (cbase))
3814 offset = - ratio * int_cst_value (cbase);
3815 cost += difference_cost (data,
3816 ubase, integer_zero_node,
3817 &symbol_present, &var_present, &offset,
3818 depends_on);
3820 else if (ratio == 1)
3822 cost += difference_cost (data,
3823 ubase, cbase,
3824 &symbol_present, &var_present, &offset,
3825 depends_on);
3827 else
3829 cost += force_var_cost (data, cbase, depends_on);
3830 cost += add_cost (TYPE_MODE (ctype));
3831 cost += difference_cost (data,
3832 ubase, integer_zero_node,
3833 &symbol_present, &var_present, &offset,
3834 depends_on);
3837 /* If we are after the increment, the value of the candidate is higher by
3838 one iteration. */
3839 if (stmt_after_increment (data->current_loop, cand, at))
3840 offset -= ratio * cstepi;
3842 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3843 (symbol/var/const parts may be omitted). If we are looking for an address,
3844 find the cost of addressing this. */
3845 if (address_p)
3846 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3848 /* Otherwise estimate the costs for computing the expression. */
3849 aratio = ratio > 0 ? ratio : -ratio;
3850 if (!symbol_present && !var_present && !offset)
3852 if (ratio != 1)
3853 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3855 return cost;
3858 if (aratio != 1)
3859 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3861 n_sums = 1;
3862 if (var_present
3863 /* Symbol + offset should be compile-time computable. */
3864 && (symbol_present || offset))
3865 n_sums++;
3867 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3869 fallback:
3871 /* Just get the expression, expand it and measure the cost. */
3872 tree comp = get_computation_at (data->current_loop, use, cand, at);
3874 if (!comp)
3875 return INFTY;
3877 if (address_p)
3878 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3880 return computation_cost (comp);
3884 /* Determines the cost of the computation by that USE is expressed
3885 from induction variable CAND. If ADDRESS_P is true, we just need
3886 to create an address from it, otherwise we want to get it into
3887 register. A set of invariants we depend on is stored in
3888 DEPENDS_ON. */
3890 static unsigned
3891 get_computation_cost (struct ivopts_data *data,
3892 struct iv_use *use, struct iv_cand *cand,
3893 bool address_p, bitmap *depends_on)
3895 return get_computation_cost_at (data,
3896 use, cand, address_p, depends_on, use->stmt);
3899 /* Determines cost of basing replacement of USE on CAND in a generic
3900 expression. */
3902 static bool
3903 determine_use_iv_cost_generic (struct ivopts_data *data,
3904 struct iv_use *use, struct iv_cand *cand)
3906 bitmap depends_on;
3907 unsigned cost;
3909 /* The simple case first -- if we need to express value of the preserved
3910 original biv, the cost is 0. This also prevents us from counting the
3911 cost of increment twice -- once at this use and once in the cost of
3912 the candidate. */
3913 if (cand->pos == IP_ORIGINAL
3914 && cand->incremented_at == use->stmt)
3916 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3917 return true;
3920 cost = get_computation_cost (data, use, cand, false, &depends_on);
3921 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3923 return cost != INFTY;
3926 /* Determines cost of basing replacement of USE on CAND in an address. */
3928 static bool
3929 determine_use_iv_cost_address (struct ivopts_data *data,
3930 struct iv_use *use, struct iv_cand *cand)
3932 bitmap depends_on;
3933 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3935 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3937 return cost != INFTY;
3940 /* Computes value of induction variable IV in iteration NITER. */
3942 static tree
3943 iv_value (struct iv *iv, tree niter)
3945 tree val;
3946 tree type = TREE_TYPE (iv->base);
3948 niter = fold_convert (type, niter);
3949 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3951 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3954 /* Computes value of candidate CAND at position AT in iteration NITER. */
3956 static tree
3957 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3959 tree val = iv_value (cand->iv, niter);
3960 tree type = TREE_TYPE (cand->iv->base);
3962 if (stmt_after_increment (loop, cand, at))
3963 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3965 return val;
3968 /* Returns period of induction variable iv. */
3970 static tree
3971 iv_period (struct iv *iv)
3973 tree step = iv->step, period, type;
3974 tree pow2div;
3976 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3978 /* Period of the iv is gcd (step, type range). Since type range is power
3979 of two, it suffices to determine the maximum power of two that divides
3980 step. */
3981 pow2div = num_ending_zeros (step);
3982 type = unsigned_type_for (TREE_TYPE (step));
3984 period = build_low_bits_mask (type,
3985 (TYPE_PRECISION (type)
3986 - tree_low_cst (pow2div, 1)));
3988 return period;
3991 /* Returns the comparison operator used when eliminating the iv USE. */
3993 static enum tree_code
3994 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3996 struct loop *loop = data->current_loop;
3997 basic_block ex_bb;
3998 edge exit;
4000 ex_bb = bb_for_stmt (use->stmt);
4001 exit = EDGE_SUCC (ex_bb, 0);
4002 if (flow_bb_inside_loop_p (loop, exit->dest))
4003 exit = EDGE_SUCC (ex_bb, 1);
4005 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4008 /* Check whether it is possible to express the condition in USE by comparison
4009 of candidate CAND. If so, store the value compared with to BOUND. */
4011 static bool
4012 may_eliminate_iv (struct ivopts_data *data,
4013 struct iv_use *use, struct iv_cand *cand, tree *bound)
4015 basic_block ex_bb;
4016 edge exit;
4017 struct tree_niter_desc *niter;
4018 tree nit, nit_type;
4019 tree wider_type, period, per_type;
4020 struct loop *loop = data->current_loop;
4022 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4023 return false;
4025 /* For now works only for exits that dominate the loop latch. TODO -- extend
4026 for other conditions inside loop body. */
4027 ex_bb = bb_for_stmt (use->stmt);
4028 if (use->stmt != last_stmt (ex_bb)
4029 || TREE_CODE (use->stmt) != COND_EXPR)
4030 return false;
4031 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4032 return false;
4034 exit = EDGE_SUCC (ex_bb, 0);
4035 if (flow_bb_inside_loop_p (loop, exit->dest))
4036 exit = EDGE_SUCC (ex_bb, 1);
4037 if (flow_bb_inside_loop_p (loop, exit->dest))
4038 return false;
4040 niter = niter_for_exit (data, exit);
4041 if (!niter
4042 || !zero_p (niter->may_be_zero))
4043 return false;
4045 nit = niter->niter;
4046 nit_type = TREE_TYPE (nit);
4048 /* Determine whether we may use the variable to test whether niter iterations
4049 elapsed. This is the case iff the period of the induction variable is
4050 greater than the number of iterations. */
4051 period = iv_period (cand->iv);
4052 if (!period)
4053 return false;
4054 per_type = TREE_TYPE (period);
4056 wider_type = TREE_TYPE (period);
4057 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4058 wider_type = per_type;
4059 else
4060 wider_type = nit_type;
4062 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4063 fold_convert (wider_type, period),
4064 fold_convert (wider_type, nit))))
4065 return false;
4067 *bound = cand_value_at (loop, cand, use->stmt, nit);
4068 return true;
4071 /* Determines cost of basing replacement of USE on CAND in a condition. */
4073 static bool
4074 determine_use_iv_cost_condition (struct ivopts_data *data,
4075 struct iv_use *use, struct iv_cand *cand)
4077 tree bound = NULL_TREE, op, cond;
4078 bitmap depends_on = NULL;
4079 unsigned cost;
4081 /* Only consider real candidates. */
4082 if (!cand->iv)
4084 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4085 return false;
4088 if (may_eliminate_iv (data, use, cand, &bound))
4090 cost = force_var_cost (data, bound, &depends_on);
4092 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4093 return cost != INFTY;
4096 /* The induction variable elimination failed; just express the original
4097 giv. If it is compared with an invariant, note that we cannot get
4098 rid of it. */
4099 cost = get_computation_cost (data, use, cand, false, &depends_on);
4101 cond = *use->op_p;
4102 if (TREE_CODE (cond) != SSA_NAME)
4104 op = TREE_OPERAND (cond, 0);
4105 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4106 op = TREE_OPERAND (cond, 1);
4107 if (TREE_CODE (op) == SSA_NAME)
4109 op = get_iv (data, op)->base;
4110 fd_ivopts_data = data;
4111 walk_tree (&op, find_depends, &depends_on, NULL);
4115 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4116 return cost != INFTY;
4119 /* Checks whether it is possible to replace the final value of USE by
4120 a direct computation. If so, the formula is stored to *VALUE. */
4122 static bool
4123 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4124 tree *value)
4126 struct loop *loop = data->current_loop;
4127 edge exit;
4128 struct tree_niter_desc *niter;
4130 exit = single_dom_exit (loop);
4131 if (!exit)
4132 return false;
4134 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4135 bb_for_stmt (use->stmt)));
4137 niter = niter_for_single_dom_exit (data);
4138 if (!niter
4139 || !zero_p (niter->may_be_zero))
4140 return false;
4142 *value = iv_value (use->iv, niter->niter);
4144 return true;
4147 /* Determines cost of replacing final value of USE using CAND. */
4149 static bool
4150 determine_use_iv_cost_outer (struct ivopts_data *data,
4151 struct iv_use *use, struct iv_cand *cand)
4153 bitmap depends_on;
4154 unsigned cost;
4155 edge exit;
4156 tree value = NULL_TREE;
4157 struct loop *loop = data->current_loop;
4159 /* The simple case first -- if we need to express value of the preserved
4160 original biv, the cost is 0. This also prevents us from counting the
4161 cost of increment twice -- once at this use and once in the cost of
4162 the candidate. */
4163 if (cand->pos == IP_ORIGINAL
4164 && cand->incremented_at == use->stmt)
4166 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4167 return true;
4170 if (!cand->iv)
4172 if (!may_replace_final_value (data, use, &value))
4174 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4175 return false;
4178 depends_on = NULL;
4179 cost = force_var_cost (data, value, &depends_on);
4181 cost /= AVG_LOOP_NITER (loop);
4183 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4184 return cost != INFTY;
4187 exit = single_dom_exit (loop);
4188 if (exit)
4190 /* If there is just a single exit, we may use value of the candidate
4191 after we take it to determine the value of use. */
4192 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4193 last_stmt (exit->src));
4194 if (cost != INFTY)
4195 cost /= AVG_LOOP_NITER (loop);
4197 else
4199 /* Otherwise we just need to compute the iv. */
4200 cost = get_computation_cost (data, use, cand, false, &depends_on);
4203 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4205 return cost != INFTY;
4208 /* Determines cost of basing replacement of USE on CAND. Returns false
4209 if USE cannot be based on CAND. */
4211 static bool
4212 determine_use_iv_cost (struct ivopts_data *data,
4213 struct iv_use *use, struct iv_cand *cand)
4215 switch (use->type)
4217 case USE_NONLINEAR_EXPR:
4218 return determine_use_iv_cost_generic (data, use, cand);
4220 case USE_OUTER:
4221 return determine_use_iv_cost_outer (data, use, cand);
4223 case USE_ADDRESS:
4224 return determine_use_iv_cost_address (data, use, cand);
4226 case USE_COMPARE:
4227 return determine_use_iv_cost_condition (data, use, cand);
4229 default:
4230 gcc_unreachable ();
4234 /* Determines costs of basing the use of the iv on an iv candidate. */
4236 static void
4237 determine_use_iv_costs (struct ivopts_data *data)
4239 unsigned i, j;
4240 struct iv_use *use;
4241 struct iv_cand *cand;
4242 bitmap to_clear = BITMAP_ALLOC (NULL);
4244 alloc_use_cost_map (data);
4246 for (i = 0; i < n_iv_uses (data); i++)
4248 use = iv_use (data, i);
4250 if (data->consider_all_candidates)
4252 for (j = 0; j < n_iv_cands (data); j++)
4254 cand = iv_cand (data, j);
4255 determine_use_iv_cost (data, use, cand);
4258 else
4260 bitmap_iterator bi;
4262 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4264 cand = iv_cand (data, j);
4265 if (!determine_use_iv_cost (data, use, cand))
4266 bitmap_set_bit (to_clear, j);
4269 /* Remove the candidates for that the cost is infinite from
4270 the list of related candidates. */
4271 bitmap_and_compl_into (use->related_cands, to_clear);
4272 bitmap_clear (to_clear);
4276 BITMAP_FREE (to_clear);
4278 if (dump_file && (dump_flags & TDF_DETAILS))
4280 fprintf (dump_file, "Use-candidate costs:\n");
4282 for (i = 0; i < n_iv_uses (data); i++)
4284 use = iv_use (data, i);
4286 fprintf (dump_file, "Use %d:\n", i);
4287 fprintf (dump_file, " cand\tcost\tdepends on\n");
4288 for (j = 0; j < use->n_map_members; j++)
4290 if (!use->cost_map[j].cand
4291 || use->cost_map[j].cost == INFTY)
4292 continue;
4294 fprintf (dump_file, " %d\t%d\t",
4295 use->cost_map[j].cand->id,
4296 use->cost_map[j].cost);
4297 if (use->cost_map[j].depends_on)
4298 bitmap_print (dump_file,
4299 use->cost_map[j].depends_on, "","");
4300 fprintf (dump_file, "\n");
4303 fprintf (dump_file, "\n");
4305 fprintf (dump_file, "\n");
4309 /* Determines cost of the candidate CAND. */
4311 static void
4312 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4314 unsigned cost_base, cost_step;
4315 tree base;
4317 if (!cand->iv)
4319 cand->cost = 0;
4320 return;
4323 /* There are two costs associated with the candidate -- its increment
4324 and its initialization. The second is almost negligible for any loop
4325 that rolls enough, so we take it just very little into account. */
4327 base = cand->iv->base;
4328 cost_base = force_var_cost (data, base, NULL);
4329 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4331 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4333 /* Prefer the original iv unless we may gain something by replacing it;
4334 this is not really relevant for artificial ivs created by other
4335 passes. */
4336 if (cand->pos == IP_ORIGINAL
4337 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4338 cand->cost--;
4340 /* Prefer not to insert statements into latch unless there are some
4341 already (so that we do not create unnecessary jumps). */
4342 if (cand->pos == IP_END
4343 && empty_block_p (ip_end_pos (data->current_loop)))
4344 cand->cost++;
4347 /* Determines costs of computation of the candidates. */
4349 static void
4350 determine_iv_costs (struct ivopts_data *data)
4352 unsigned i;
4354 if (dump_file && (dump_flags & TDF_DETAILS))
4356 fprintf (dump_file, "Candidate costs:\n");
4357 fprintf (dump_file, " cand\tcost\n");
4360 for (i = 0; i < n_iv_cands (data); i++)
4362 struct iv_cand *cand = iv_cand (data, i);
4364 determine_iv_cost (data, cand);
4366 if (dump_file && (dump_flags & TDF_DETAILS))
4367 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4370 if (dump_file && (dump_flags & TDF_DETAILS))
4371 fprintf (dump_file, "\n");
4374 /* Calculates cost for having SIZE induction variables. */
4376 static unsigned
4377 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4379 return global_cost_for_size (size,
4380 loop_data (data->current_loop)->regs_used,
4381 n_iv_uses (data));
4384 /* For each size of the induction variable set determine the penalty. */
4386 static void
4387 determine_set_costs (struct ivopts_data *data)
4389 unsigned j, n;
4390 tree phi, op;
4391 struct loop *loop = data->current_loop;
4392 bitmap_iterator bi;
4394 /* We use the following model (definitely improvable, especially the
4395 cost function -- TODO):
4397 We estimate the number of registers available (using MD data), name it A.
4399 We estimate the number of registers used by the loop, name it U. This
4400 number is obtained as the number of loop phi nodes (not counting virtual
4401 registers and bivs) + the number of variables from outside of the loop.
4403 We set a reserve R (free regs that are used for temporary computations,
4404 etc.). For now the reserve is a constant 3.
4406 Let I be the number of induction variables.
4408 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4409 make a lot of ivs without a reason).
4410 -- if A - R < U + I <= A, the cost is I * PRES_COST
4411 -- if U + I > A, the cost is I * PRES_COST and
4412 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4414 if (dump_file && (dump_flags & TDF_DETAILS))
4416 fprintf (dump_file, "Global costs:\n");
4417 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4418 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4419 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4420 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4423 n = 0;
4424 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4426 op = PHI_RESULT (phi);
4428 if (!is_gimple_reg (op))
4429 continue;
4431 if (get_iv (data, op))
4432 continue;
4434 n++;
4437 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4439 struct version_info *info = ver_info (data, j);
4441 if (info->inv_id && info->has_nonlin_use)
4442 n++;
4445 loop_data (loop)->regs_used = n;
4446 if (dump_file && (dump_flags & TDF_DETAILS))
4447 fprintf (dump_file, " regs_used %d\n", n);
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4451 fprintf (dump_file, " cost for size:\n");
4452 fprintf (dump_file, " ivs\tcost\n");
4453 for (j = 0; j <= 2 * target_avail_regs; j++)
4454 fprintf (dump_file, " %d\t%d\n", j,
4455 ivopts_global_cost_for_size (data, j));
4456 fprintf (dump_file, "\n");
4460 /* Returns true if A is a cheaper cost pair than B. */
4462 static bool
4463 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4465 if (!a)
4466 return false;
4468 if (!b)
4469 return true;
4471 if (a->cost < b->cost)
4472 return true;
4474 if (a->cost > b->cost)
4475 return false;
4477 /* In case the costs are the same, prefer the cheaper candidate. */
4478 if (a->cand->cost < b->cand->cost)
4479 return true;
4481 return false;
4484 /* Computes the cost field of IVS structure. */
4486 static void
4487 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4489 unsigned cost = 0;
4491 cost += ivs->cand_use_cost;
4492 cost += ivs->cand_cost;
4493 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4495 ivs->cost = cost;
4498 /* Remove invariants in set INVS to set IVS. */
4500 static void
4501 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4503 bitmap_iterator bi;
4504 unsigned iid;
4506 if (!invs)
4507 return;
4509 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4511 ivs->n_invariant_uses[iid]--;
4512 if (ivs->n_invariant_uses[iid] == 0)
4513 ivs->n_regs--;
4517 /* Set USE not to be expressed by any candidate in IVS. */
4519 static void
4520 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4521 struct iv_use *use)
4523 unsigned uid = use->id, cid;
4524 struct cost_pair *cp;
4526 cp = ivs->cand_for_use[uid];
4527 if (!cp)
4528 return;
4529 cid = cp->cand->id;
4531 ivs->bad_uses++;
4532 ivs->cand_for_use[uid] = NULL;
4533 ivs->n_cand_uses[cid]--;
4535 if (ivs->n_cand_uses[cid] == 0)
4537 bitmap_clear_bit (ivs->cands, cid);
4538 /* Do not count the pseudocandidates. */
4539 if (cp->cand->iv)
4540 ivs->n_regs--;
4541 ivs->n_cands--;
4542 ivs->cand_cost -= cp->cand->cost;
4544 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4547 ivs->cand_use_cost -= cp->cost;
4549 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4550 iv_ca_recount_cost (data, ivs);
4553 /* Add invariants in set INVS to set IVS. */
4555 static void
4556 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4558 bitmap_iterator bi;
4559 unsigned iid;
4561 if (!invs)
4562 return;
4564 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4566 ivs->n_invariant_uses[iid]++;
4567 if (ivs->n_invariant_uses[iid] == 1)
4568 ivs->n_regs++;
4572 /* Set cost pair for USE in set IVS to CP. */
4574 static void
4575 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4576 struct iv_use *use, struct cost_pair *cp)
4578 unsigned uid = use->id, cid;
4580 if (ivs->cand_for_use[uid] == cp)
4581 return;
4583 if (ivs->cand_for_use[uid])
4584 iv_ca_set_no_cp (data, ivs, use);
4586 if (cp)
4588 cid = cp->cand->id;
4590 ivs->bad_uses--;
4591 ivs->cand_for_use[uid] = cp;
4592 ivs->n_cand_uses[cid]++;
4593 if (ivs->n_cand_uses[cid] == 1)
4595 bitmap_set_bit (ivs->cands, cid);
4596 /* Do not count the pseudocandidates. */
4597 if (cp->cand->iv)
4598 ivs->n_regs++;
4599 ivs->n_cands++;
4600 ivs->cand_cost += cp->cand->cost;
4602 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4605 ivs->cand_use_cost += cp->cost;
4606 iv_ca_set_add_invariants (ivs, cp->depends_on);
4607 iv_ca_recount_cost (data, ivs);
4611 /* Extend set IVS by expressing USE by some of the candidates in it
4612 if possible. */
4614 static void
4615 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4616 struct iv_use *use)
4618 struct cost_pair *best_cp = NULL, *cp;
4619 bitmap_iterator bi;
4620 unsigned i;
4622 gcc_assert (ivs->upto >= use->id);
4624 if (ivs->upto == use->id)
4626 ivs->upto++;
4627 ivs->bad_uses++;
4630 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4632 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4634 if (cheaper_cost_pair (cp, best_cp))
4635 best_cp = cp;
4638 iv_ca_set_cp (data, ivs, use, best_cp);
4641 /* Get cost for assignment IVS. */
4643 static unsigned
4644 iv_ca_cost (struct iv_ca *ivs)
4646 return (ivs->bad_uses ? INFTY : ivs->cost);
4649 /* Returns true if all dependences of CP are among invariants in IVS. */
4651 static bool
4652 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4654 unsigned i;
4655 bitmap_iterator bi;
4657 if (!cp->depends_on)
4658 return true;
4660 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4662 if (ivs->n_invariant_uses[i] == 0)
4663 return false;
4666 return true;
4669 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4670 it before NEXT_CHANGE. */
4672 static struct iv_ca_delta *
4673 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4674 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4676 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4678 change->use = use;
4679 change->old_cp = old_cp;
4680 change->new_cp = new_cp;
4681 change->next_change = next_change;
4683 return change;
4686 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4687 are rewritten. */
4689 static struct iv_ca_delta *
4690 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4692 struct iv_ca_delta *last;
4694 if (!l2)
4695 return l1;
4697 if (!l1)
4698 return l2;
4700 for (last = l1; last->next_change; last = last->next_change)
4701 continue;
4702 last->next_change = l2;
4704 return l1;
4707 /* Returns candidate by that USE is expressed in IVS. */
4709 static struct cost_pair *
4710 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4712 return ivs->cand_for_use[use->id];
4715 /* Reverse the list of changes DELTA, forming the inverse to it. */
4717 static struct iv_ca_delta *
4718 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4720 struct iv_ca_delta *act, *next, *prev = NULL;
4721 struct cost_pair *tmp;
4723 for (act = delta; act; act = next)
4725 next = act->next_change;
4726 act->next_change = prev;
4727 prev = act;
4729 tmp = act->old_cp;
4730 act->old_cp = act->new_cp;
4731 act->new_cp = tmp;
4734 return prev;
4737 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4738 reverted instead. */
4740 static void
4741 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4742 struct iv_ca_delta *delta, bool forward)
4744 struct cost_pair *from, *to;
4745 struct iv_ca_delta *act;
4747 if (!forward)
4748 delta = iv_ca_delta_reverse (delta);
4750 for (act = delta; act; act = act->next_change)
4752 from = act->old_cp;
4753 to = act->new_cp;
4754 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4755 iv_ca_set_cp (data, ivs, act->use, to);
4758 if (!forward)
4759 iv_ca_delta_reverse (delta);
4762 /* Returns true if CAND is used in IVS. */
4764 static bool
4765 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4767 return ivs->n_cand_uses[cand->id] > 0;
4770 /* Returns number of induction variable candidates in the set IVS. */
4772 static unsigned
4773 iv_ca_n_cands (struct iv_ca *ivs)
4775 return ivs->n_cands;
4778 /* Free the list of changes DELTA. */
4780 static void
4781 iv_ca_delta_free (struct iv_ca_delta **delta)
4783 struct iv_ca_delta *act, *next;
4785 for (act = *delta; act; act = next)
4787 next = act->next_change;
4788 free (act);
4791 *delta = NULL;
4794 /* Allocates new iv candidates assignment. */
4796 static struct iv_ca *
4797 iv_ca_new (struct ivopts_data *data)
4799 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4801 nw->upto = 0;
4802 nw->bad_uses = 0;
4803 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4804 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4805 nw->cands = BITMAP_ALLOC (NULL);
4806 nw->n_cands = 0;
4807 nw->n_regs = 0;
4808 nw->cand_use_cost = 0;
4809 nw->cand_cost = 0;
4810 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4811 nw->cost = 0;
4813 return nw;
4816 /* Free memory occupied by the set IVS. */
4818 static void
4819 iv_ca_free (struct iv_ca **ivs)
4821 free ((*ivs)->cand_for_use);
4822 free ((*ivs)->n_cand_uses);
4823 BITMAP_FREE ((*ivs)->cands);
4824 free ((*ivs)->n_invariant_uses);
4825 free (*ivs);
4826 *ivs = NULL;
4829 /* Dumps IVS to FILE. */
4831 static void
4832 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4834 const char *pref = " invariants ";
4835 unsigned i;
4837 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4838 bitmap_print (file, ivs->cands, " candidates ","\n");
4840 for (i = 1; i <= data->max_inv_id; i++)
4841 if (ivs->n_invariant_uses[i])
4843 fprintf (file, "%s%d", pref, i);
4844 pref = ", ";
4846 fprintf (file, "\n");
4849 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4850 new set, and store differences in DELTA. Number of induction variables
4851 in the new set is stored to N_IVS. */
4853 static unsigned
4854 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4855 struct iv_cand *cand, struct iv_ca_delta **delta,
4856 unsigned *n_ivs)
4858 unsigned i, cost;
4859 struct iv_use *use;
4860 struct cost_pair *old_cp, *new_cp;
4862 *delta = NULL;
4863 for (i = 0; i < ivs->upto; i++)
4865 use = iv_use (data, i);
4866 old_cp = iv_ca_cand_for_use (ivs, use);
4868 if (old_cp
4869 && old_cp->cand == cand)
4870 continue;
4872 new_cp = get_use_iv_cost (data, use, cand);
4873 if (!new_cp)
4874 continue;
4876 if (!iv_ca_has_deps (ivs, new_cp))
4877 continue;
4879 if (!cheaper_cost_pair (new_cp, old_cp))
4880 continue;
4882 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4885 iv_ca_delta_commit (data, ivs, *delta, true);
4886 cost = iv_ca_cost (ivs);
4887 if (n_ivs)
4888 *n_ivs = iv_ca_n_cands (ivs);
4889 iv_ca_delta_commit (data, ivs, *delta, false);
4891 return cost;
4894 /* Try narrowing set IVS by removing CAND. Return the cost of
4895 the new set and store the differences in DELTA. */
4897 static unsigned
4898 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4899 struct iv_cand *cand, struct iv_ca_delta **delta)
4901 unsigned i, ci;
4902 struct iv_use *use;
4903 struct cost_pair *old_cp, *new_cp, *cp;
4904 bitmap_iterator bi;
4905 struct iv_cand *cnd;
4906 unsigned cost;
4908 *delta = NULL;
4909 for (i = 0; i < n_iv_uses (data); i++)
4911 use = iv_use (data, i);
4913 old_cp = iv_ca_cand_for_use (ivs, use);
4914 if (old_cp->cand != cand)
4915 continue;
4917 new_cp = NULL;
4919 if (data->consider_all_candidates)
4921 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4923 if (ci == cand->id)
4924 continue;
4926 cnd = iv_cand (data, ci);
4928 cp = get_use_iv_cost (data, use, cnd);
4929 if (!cp)
4930 continue;
4931 if (!iv_ca_has_deps (ivs, cp))
4932 continue;
4934 if (!cheaper_cost_pair (cp, new_cp))
4935 continue;
4937 new_cp = cp;
4940 else
4942 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4944 if (ci == cand->id)
4945 continue;
4947 cnd = iv_cand (data, ci);
4949 cp = get_use_iv_cost (data, use, cnd);
4950 if (!cp)
4951 continue;
4952 if (!iv_ca_has_deps (ivs, cp))
4953 continue;
4955 if (!cheaper_cost_pair (cp, new_cp))
4956 continue;
4958 new_cp = cp;
4962 if (!new_cp)
4964 iv_ca_delta_free (delta);
4965 return INFTY;
4968 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4971 iv_ca_delta_commit (data, ivs, *delta, true);
4972 cost = iv_ca_cost (ivs);
4973 iv_ca_delta_commit (data, ivs, *delta, false);
4975 return cost;
4978 /* Try optimizing the set of candidates IVS by removing candidates different
4979 from to EXCEPT_CAND from it. Return cost of the new set, and store
4980 differences in DELTA. */
4982 static unsigned
4983 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4984 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4986 bitmap_iterator bi;
4987 struct iv_ca_delta *act_delta, *best_delta;
4988 unsigned i, best_cost, acost;
4989 struct iv_cand *cand;
4991 best_delta = NULL;
4992 best_cost = iv_ca_cost (ivs);
4994 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4996 cand = iv_cand (data, i);
4998 if (cand == except_cand)
4999 continue;
5001 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5003 if (acost < best_cost)
5005 best_cost = acost;
5006 iv_ca_delta_free (&best_delta);
5007 best_delta = act_delta;
5009 else
5010 iv_ca_delta_free (&act_delta);
5013 if (!best_delta)
5015 *delta = NULL;
5016 return best_cost;
5019 /* Recurse to possibly remove other unnecessary ivs. */
5020 iv_ca_delta_commit (data, ivs, best_delta, true);
5021 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5022 iv_ca_delta_commit (data, ivs, best_delta, false);
5023 *delta = iv_ca_delta_join (best_delta, *delta);
5024 return best_cost;
5027 /* Tries to extend the sets IVS in the best possible way in order
5028 to express the USE. */
5030 static bool
5031 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5032 struct iv_use *use)
5034 unsigned best_cost, act_cost;
5035 unsigned i;
5036 bitmap_iterator bi;
5037 struct iv_cand *cand;
5038 struct iv_ca_delta *best_delta = NULL, *act_delta;
5039 struct cost_pair *cp;
5041 iv_ca_add_use (data, ivs, use);
5042 best_cost = iv_ca_cost (ivs);
5044 cp = iv_ca_cand_for_use (ivs, use);
5045 if (cp)
5047 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5048 iv_ca_set_no_cp (data, ivs, use);
5051 /* First try important candidates. Only if it fails, try the specific ones.
5052 Rationale -- in loops with many variables the best choice often is to use
5053 just one generic biv. If we added here many ivs specific to the uses,
5054 the optimization algorithm later would be likely to get stuck in a local
5055 minimum, thus causing us to create too many ivs. The approach from
5056 few ivs to more seems more likely to be successful -- starting from few
5057 ivs, replacing an expensive use by a specific iv should always be a
5058 win. */
5059 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5061 cand = iv_cand (data, i);
5063 if (iv_ca_cand_used_p (ivs, cand))
5064 continue;
5066 cp = get_use_iv_cost (data, use, cand);
5067 if (!cp)
5068 continue;
5070 iv_ca_set_cp (data, ivs, use, cp);
5071 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5072 iv_ca_set_no_cp (data, ivs, use);
5073 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5075 if (act_cost < best_cost)
5077 best_cost = act_cost;
5079 iv_ca_delta_free (&best_delta);
5080 best_delta = act_delta;
5082 else
5083 iv_ca_delta_free (&act_delta);
5086 if (best_cost == INFTY)
5088 for (i = 0; i < use->n_map_members; i++)
5090 cp = use->cost_map + i;
5091 cand = cp->cand;
5092 if (!cand)
5093 continue;
5095 /* Already tried this. */
5096 if (cand->important)
5097 continue;
5099 if (iv_ca_cand_used_p (ivs, cand))
5100 continue;
5102 act_delta = NULL;
5103 iv_ca_set_cp (data, ivs, use, cp);
5104 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5105 iv_ca_set_no_cp (data, ivs, use);
5106 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5107 cp, act_delta);
5109 if (act_cost < best_cost)
5111 best_cost = act_cost;
5113 if (best_delta)
5114 iv_ca_delta_free (&best_delta);
5115 best_delta = act_delta;
5117 else
5118 iv_ca_delta_free (&act_delta);
5122 iv_ca_delta_commit (data, ivs, best_delta, true);
5123 iv_ca_delta_free (&best_delta);
5125 return (best_cost != INFTY);
5128 /* Finds an initial assignment of candidates to uses. */
5130 static struct iv_ca *
5131 get_initial_solution (struct ivopts_data *data)
5133 struct iv_ca *ivs = iv_ca_new (data);
5134 unsigned i;
5136 for (i = 0; i < n_iv_uses (data); i++)
5137 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5139 iv_ca_free (&ivs);
5140 return NULL;
5143 return ivs;
5146 /* Tries to improve set of induction variables IVS. */
5148 static bool
5149 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5151 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5152 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5153 struct iv_cand *cand;
5155 /* Try extending the set of induction variables by one. */
5156 for (i = 0; i < n_iv_cands (data); i++)
5158 cand = iv_cand (data, i);
5160 if (iv_ca_cand_used_p (ivs, cand))
5161 continue;
5163 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5164 if (!act_delta)
5165 continue;
5167 /* If we successfully added the candidate and the set is small enough,
5168 try optimizing it by removing other candidates. */
5169 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5171 iv_ca_delta_commit (data, ivs, act_delta, true);
5172 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5173 iv_ca_delta_commit (data, ivs, act_delta, false);
5174 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5177 if (acost < best_cost)
5179 best_cost = acost;
5180 iv_ca_delta_free (&best_delta);
5181 best_delta = act_delta;
5183 else
5184 iv_ca_delta_free (&act_delta);
5187 if (!best_delta)
5189 /* Try removing the candidates from the set instead. */
5190 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5192 /* Nothing more we can do. */
5193 if (!best_delta)
5194 return false;
5197 iv_ca_delta_commit (data, ivs, best_delta, true);
5198 gcc_assert (best_cost == iv_ca_cost (ivs));
5199 iv_ca_delta_free (&best_delta);
5200 return true;
5203 /* Attempts to find the optimal set of induction variables. We do simple
5204 greedy heuristic -- we try to replace at most one candidate in the selected
5205 solution and remove the unused ivs while this improves the cost. */
5207 static struct iv_ca *
5208 find_optimal_iv_set (struct ivopts_data *data)
5210 unsigned i;
5211 struct iv_ca *set;
5212 struct iv_use *use;
5214 /* Get the initial solution. */
5215 set = get_initial_solution (data);
5216 if (!set)
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5219 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5220 return NULL;
5223 if (dump_file && (dump_flags & TDF_DETAILS))
5225 fprintf (dump_file, "Initial set of candidates:\n");
5226 iv_ca_dump (data, dump_file, set);
5229 while (try_improve_iv_set (data, set))
5231 if (dump_file && (dump_flags & TDF_DETAILS))
5233 fprintf (dump_file, "Improved to:\n");
5234 iv_ca_dump (data, dump_file, set);
5238 if (dump_file && (dump_flags & TDF_DETAILS))
5239 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5241 for (i = 0; i < n_iv_uses (data); i++)
5243 use = iv_use (data, i);
5244 use->selected = iv_ca_cand_for_use (set, use)->cand;
5247 return set;
5250 /* Creates a new induction variable corresponding to CAND. */
5252 static void
5253 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5255 block_stmt_iterator incr_pos;
5256 tree base;
5257 bool after = false;
5259 if (!cand->iv)
5260 return;
5262 switch (cand->pos)
5264 case IP_NORMAL:
5265 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5266 break;
5268 case IP_END:
5269 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5270 after = true;
5271 break;
5273 case IP_ORIGINAL:
5274 /* Mark that the iv is preserved. */
5275 name_info (data, cand->var_before)->preserve_biv = true;
5276 name_info (data, cand->var_after)->preserve_biv = true;
5278 /* Rewrite the increment so that it uses var_before directly. */
5279 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5281 return;
5284 gimple_add_tmp_var (cand->var_before);
5285 add_referenced_tmp_var (cand->var_before);
5287 base = unshare_expr (cand->iv->base);
5289 create_iv (base, unshare_expr (cand->iv->step),
5290 cand->var_before, data->current_loop,
5291 &incr_pos, after, &cand->var_before, &cand->var_after);
5294 /* Creates new induction variables described in SET. */
5296 static void
5297 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5299 unsigned i;
5300 struct iv_cand *cand;
5301 bitmap_iterator bi;
5303 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5305 cand = iv_cand (data, i);
5306 create_new_iv (data, cand);
5310 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5311 is true, remove also the ssa name defined by the statement. */
5313 static void
5314 remove_statement (tree stmt, bool including_defined_name)
5316 if (TREE_CODE (stmt) == PHI_NODE)
5318 if (!including_defined_name)
5320 /* Prevent the ssa name defined by the statement from being removed. */
5321 SET_PHI_RESULT (stmt, NULL);
5323 remove_phi_node (stmt, NULL_TREE);
5325 else
5327 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5329 bsi_remove (&bsi);
5333 /* Rewrites USE (definition of iv used in a nonlinear expression)
5334 using candidate CAND. */
5336 static void
5337 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5338 struct iv_use *use, struct iv_cand *cand)
5340 tree comp;
5341 tree op, stmts, tgt, ass;
5342 block_stmt_iterator bsi, pbsi;
5344 /* An important special case -- if we are asked to express value of
5345 the original iv by itself, just exit; there is no need to
5346 introduce a new computation (that might also need casting the
5347 variable to unsigned and back). */
5348 if (cand->pos == IP_ORIGINAL
5349 && cand->incremented_at == use->stmt)
5351 tree step, ctype, utype;
5352 enum tree_code incr_code = PLUS_EXPR;
5354 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5355 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5357 step = cand->iv->step;
5358 ctype = TREE_TYPE (step);
5359 utype = TREE_TYPE (cand->var_after);
5360 if (TREE_CODE (step) == NEGATE_EXPR)
5362 incr_code = MINUS_EXPR;
5363 step = TREE_OPERAND (step, 0);
5366 /* Check whether we may leave the computation unchanged.
5367 This is the case only if it does not rely on other
5368 computations in the loop -- otherwise, the computation
5369 we rely upon may be removed in remove_unused_ivs,
5370 thus leading to ICE. */
5371 op = TREE_OPERAND (use->stmt, 1);
5372 if (TREE_CODE (op) == PLUS_EXPR
5373 || TREE_CODE (op) == MINUS_EXPR)
5375 if (TREE_OPERAND (op, 0) == cand->var_before)
5376 op = TREE_OPERAND (op, 1);
5377 else if (TREE_CODE (op) == PLUS_EXPR
5378 && TREE_OPERAND (op, 1) == cand->var_before)
5379 op = TREE_OPERAND (op, 0);
5380 else
5381 op = NULL_TREE;
5383 else
5384 op = NULL_TREE;
5386 if (op
5387 && (TREE_CODE (op) == INTEGER_CST
5388 || operand_equal_p (op, step, 0)))
5389 return;
5391 /* Otherwise, add the necessary computations to express
5392 the iv. */
5393 op = fold_convert (ctype, cand->var_before);
5394 comp = fold_convert (utype,
5395 build2 (incr_code, ctype, op,
5396 unshare_expr (step)));
5398 else
5399 comp = get_computation (data->current_loop, use, cand);
5401 switch (TREE_CODE (use->stmt))
5403 case PHI_NODE:
5404 tgt = PHI_RESULT (use->stmt);
5406 /* If we should keep the biv, do not replace it. */
5407 if (name_info (data, tgt)->preserve_biv)
5408 return;
5410 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5411 while (!bsi_end_p (pbsi)
5412 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5414 bsi = pbsi;
5415 bsi_next (&pbsi);
5417 break;
5419 case MODIFY_EXPR:
5420 tgt = TREE_OPERAND (use->stmt, 0);
5421 bsi = bsi_for_stmt (use->stmt);
5422 break;
5424 default:
5425 gcc_unreachable ();
5428 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5430 if (TREE_CODE (use->stmt) == PHI_NODE)
5432 if (stmts)
5433 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5434 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5435 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5436 remove_statement (use->stmt, false);
5437 SSA_NAME_DEF_STMT (tgt) = ass;
5439 else
5441 if (stmts)
5442 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5443 TREE_OPERAND (use->stmt, 1) = op;
5447 /* Replaces ssa name in index IDX by its basic variable. Callback for
5448 for_each_index. */
5450 static bool
5451 idx_remove_ssa_names (tree base, tree *idx,
5452 void *data ATTRIBUTE_UNUSED)
5454 tree *op;
5456 if (TREE_CODE (*idx) == SSA_NAME)
5457 *idx = SSA_NAME_VAR (*idx);
5459 if (TREE_CODE (base) == ARRAY_REF)
5461 op = &TREE_OPERAND (base, 2);
5462 if (*op
5463 && TREE_CODE (*op) == SSA_NAME)
5464 *op = SSA_NAME_VAR (*op);
5465 op = &TREE_OPERAND (base, 3);
5466 if (*op
5467 && TREE_CODE (*op) == SSA_NAME)
5468 *op = SSA_NAME_VAR (*op);
5471 return true;
5474 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5476 static tree
5477 unshare_and_remove_ssa_names (tree ref)
5479 ref = unshare_expr (ref);
5480 for_each_index (&ref, idx_remove_ssa_names, NULL);
5482 return ref;
5485 /* Extract the alias analysis info for the memory reference REF. There are
5486 several ways how this information may be stored and what precisely is
5487 its semantics depending on the type of the reference, but there always is
5488 somewhere hidden one _DECL node that is used to determine the set of
5489 virtual operands for the reference. The code below deciphers this jungle
5490 and extracts this single useful piece of information. */
5492 static tree
5493 get_ref_tag (tree ref)
5495 tree var = get_base_address (ref);
5496 tree tag;
5498 if (!var)
5499 return NULL_TREE;
5501 if (TREE_CODE (var) == INDIRECT_REF)
5503 /* In case the base is a dereference of a pointer, first check its name
5504 mem tag, and if it does not have one, use type mem tag. */
5505 var = TREE_OPERAND (var, 0);
5506 if (TREE_CODE (var) != SSA_NAME)
5507 return NULL_TREE;
5509 if (SSA_NAME_PTR_INFO (var))
5511 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5512 if (tag)
5513 return tag;
5516 var = SSA_NAME_VAR (var);
5517 tag = var_ann (var)->type_mem_tag;
5518 gcc_assert (tag != NULL_TREE);
5519 return tag;
5521 else
5523 if (!DECL_P (var))
5524 return NULL_TREE;
5526 tag = var_ann (var)->type_mem_tag;
5527 if (tag)
5528 return tag;
5530 return var;
5534 /* Copies the reference information from OLD_REF to NEW_REF. */
5536 static void
5537 copy_ref_info (tree new_ref, tree old_ref)
5539 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5540 copy_mem_ref_info (new_ref, old_ref);
5541 else
5543 TMR_TAG (new_ref) = get_ref_tag (old_ref);
5544 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5548 /* Rewrites USE (address that is an iv) using candidate CAND. */
5550 static void
5551 rewrite_use_address (struct ivopts_data *data,
5552 struct iv_use *use, struct iv_cand *cand)
5554 struct affine_tree_combination aff;
5555 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5556 tree ref;
5558 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5559 unshare_aff_combination (&aff);
5561 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5562 copy_ref_info (ref, *use->op_p);
5563 *use->op_p = ref;
5566 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5567 candidate CAND. */
5569 static void
5570 rewrite_use_compare (struct ivopts_data *data,
5571 struct iv_use *use, struct iv_cand *cand)
5573 tree comp;
5574 tree *op_p, cond, op, stmts, bound;
5575 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5576 enum tree_code compare;
5577 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5579 bound = cp->value;
5580 if (bound)
5582 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5583 tree var_type = TREE_TYPE (var);
5585 compare = iv_elimination_compare (data, use);
5586 bound = fold_convert (var_type, bound);
5587 op = force_gimple_operand (unshare_expr (bound), &stmts,
5588 true, NULL_TREE);
5590 if (stmts)
5591 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5593 *use->op_p = build2 (compare, boolean_type_node, var, op);
5594 update_stmt (use->stmt);
5595 return;
5598 /* The induction variable elimination failed; just express the original
5599 giv. */
5600 comp = get_computation (data->current_loop, use, cand);
5602 cond = *use->op_p;
5603 op_p = &TREE_OPERAND (cond, 0);
5604 if (TREE_CODE (*op_p) != SSA_NAME
5605 || zero_p (get_iv (data, *op_p)->step))
5606 op_p = &TREE_OPERAND (cond, 1);
5608 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5609 if (stmts)
5610 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5612 *op_p = op;
5615 /* Ensure that operand *OP_P may be used at the end of EXIT without
5616 violating loop closed ssa form. */
5618 static void
5619 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5621 basic_block def_bb;
5622 struct loop *def_loop;
5623 tree phi, use;
5625 use = USE_FROM_PTR (op_p);
5626 if (TREE_CODE (use) != SSA_NAME)
5627 return;
5629 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5630 if (!def_bb)
5631 return;
5633 def_loop = def_bb->loop_father;
5634 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5635 return;
5637 /* Try finding a phi node that copies the value out of the loop. */
5638 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5639 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5640 break;
5642 if (!phi)
5644 /* Create such a phi node. */
5645 tree new_name = duplicate_ssa_name (use, NULL);
5647 phi = create_phi_node (new_name, exit->dest);
5648 SSA_NAME_DEF_STMT (new_name) = phi;
5649 add_phi_arg (phi, use, exit);
5652 SET_USE (op_p, PHI_RESULT (phi));
5655 /* Ensure that operands of STMT may be used at the end of EXIT without
5656 violating loop closed ssa form. */
5658 static void
5659 protect_loop_closed_ssa_form (edge exit, tree stmt)
5661 ssa_op_iter iter;
5662 use_operand_p use_p;
5664 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5665 protect_loop_closed_ssa_form_use (exit, use_p);
5668 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5669 so that they are emitted on the correct place, and so that the loop closed
5670 ssa form is preserved. */
5672 void
5673 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5675 tree_stmt_iterator tsi;
5676 block_stmt_iterator bsi;
5677 tree phi, stmt, def, next;
5679 if (!single_pred_p (exit->dest))
5680 split_loop_exit_edge (exit);
5682 /* Ensure there is label in exit->dest, so that we can
5683 insert after it. */
5684 tree_block_label (exit->dest);
5685 bsi = bsi_after_labels (exit->dest);
5687 if (TREE_CODE (stmts) == STATEMENT_LIST)
5689 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5691 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5692 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5695 else
5697 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5698 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5701 if (!op)
5702 return;
5704 for (phi = phi_nodes (exit->dest); phi; phi = next)
5706 next = PHI_CHAIN (phi);
5708 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5710 def = PHI_RESULT (phi);
5711 remove_statement (phi, false);
5712 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5713 def, op);
5714 SSA_NAME_DEF_STMT (def) = stmt;
5715 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5720 /* Rewrites the final value of USE (that is only needed outside of the loop)
5721 using candidate CAND. */
5723 static void
5724 rewrite_use_outer (struct ivopts_data *data,
5725 struct iv_use *use, struct iv_cand *cand)
5727 edge exit;
5728 tree value, op, stmts, tgt;
5729 tree phi;
5731 switch (TREE_CODE (use->stmt))
5733 case PHI_NODE:
5734 tgt = PHI_RESULT (use->stmt);
5735 break;
5736 case MODIFY_EXPR:
5737 tgt = TREE_OPERAND (use->stmt, 0);
5738 break;
5739 default:
5740 gcc_unreachable ();
5743 exit = single_dom_exit (data->current_loop);
5745 if (exit)
5747 if (!cand->iv)
5749 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5750 value = unshare_expr (cp->value);
5752 else
5753 value = get_computation_at (data->current_loop,
5754 use, cand, last_stmt (exit->src));
5756 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5758 /* If we will preserve the iv anyway and we would need to perform
5759 some computation to replace the final value, do nothing. */
5760 if (stmts && name_info (data, tgt)->preserve_biv)
5761 return;
5763 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5765 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5767 if (USE_FROM_PTR (use_p) == tgt)
5768 SET_USE (use_p, op);
5771 if (stmts)
5772 compute_phi_arg_on_exit (exit, stmts, op);
5774 /* Enable removal of the statement. We cannot remove it directly,
5775 since we may still need the aliasing information attached to the
5776 ssa name defined by it. */
5777 name_info (data, tgt)->iv->have_use_for = false;
5778 return;
5781 /* If the variable is going to be preserved anyway, there is nothing to
5782 do. */
5783 if (name_info (data, tgt)->preserve_biv)
5784 return;
5786 /* Otherwise we just need to compute the iv. */
5787 rewrite_use_nonlinear_expr (data, use, cand);
5790 /* Rewrites USE using candidate CAND. */
5792 static void
5793 rewrite_use (struct ivopts_data *data,
5794 struct iv_use *use, struct iv_cand *cand)
5796 switch (use->type)
5798 case USE_NONLINEAR_EXPR:
5799 rewrite_use_nonlinear_expr (data, use, cand);
5800 break;
5802 case USE_OUTER:
5803 rewrite_use_outer (data, use, cand);
5804 break;
5806 case USE_ADDRESS:
5807 rewrite_use_address (data, use, cand);
5808 break;
5810 case USE_COMPARE:
5811 rewrite_use_compare (data, use, cand);
5812 break;
5814 default:
5815 gcc_unreachable ();
5817 update_stmt (use->stmt);
5820 /* Rewrite the uses using the selected induction variables. */
5822 static void
5823 rewrite_uses (struct ivopts_data *data)
5825 unsigned i;
5826 struct iv_cand *cand;
5827 struct iv_use *use;
5829 for (i = 0; i < n_iv_uses (data); i++)
5831 use = iv_use (data, i);
5832 cand = use->selected;
5833 gcc_assert (cand);
5835 rewrite_use (data, use, cand);
5839 /* Removes the ivs that are not used after rewriting. */
5841 static void
5842 remove_unused_ivs (struct ivopts_data *data)
5844 unsigned j;
5845 bitmap_iterator bi;
5847 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5849 struct version_info *info;
5851 info = ver_info (data, j);
5852 if (info->iv
5853 && !zero_p (info->iv->step)
5854 && !info->inv_id
5855 && !info->iv->have_use_for
5856 && !info->preserve_biv)
5857 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5861 /* Frees data allocated by the optimization of a single loop. */
5863 static void
5864 free_loop_data (struct ivopts_data *data)
5866 unsigned i, j;
5867 bitmap_iterator bi;
5868 tree obj;
5870 htab_empty (data->niters);
5872 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5874 struct version_info *info;
5876 info = ver_info (data, i);
5877 if (info->iv)
5878 free (info->iv);
5879 info->iv = NULL;
5880 info->has_nonlin_use = false;
5881 info->preserve_biv = false;
5882 info->inv_id = 0;
5884 bitmap_clear (data->relevant);
5885 bitmap_clear (data->important_candidates);
5887 for (i = 0; i < n_iv_uses (data); i++)
5889 struct iv_use *use = iv_use (data, i);
5891 free (use->iv);
5892 BITMAP_FREE (use->related_cands);
5893 for (j = 0; j < use->n_map_members; j++)
5894 if (use->cost_map[j].depends_on)
5895 BITMAP_FREE (use->cost_map[j].depends_on);
5896 free (use->cost_map);
5897 free (use);
5899 VEC_truncate (iv_use_p, data->iv_uses, 0);
5901 for (i = 0; i < n_iv_cands (data); i++)
5903 struct iv_cand *cand = iv_cand (data, i);
5905 if (cand->iv)
5906 free (cand->iv);
5907 if (cand->depends_on)
5908 BITMAP_FREE (cand->depends_on);
5909 free (cand);
5911 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5913 if (data->version_info_size < num_ssa_names)
5915 data->version_info_size = 2 * num_ssa_names;
5916 free (data->version_info);
5917 data->version_info = xcalloc (data->version_info_size,
5918 sizeof (struct version_info));
5921 data->max_inv_id = 0;
5923 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5924 SET_DECL_RTL (obj, NULL_RTX);
5926 VEC_truncate (tree, decl_rtl_to_reset, 0);
5929 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5930 loop tree. */
5932 static void
5933 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5935 unsigned i;
5937 for (i = 1; i < loops->num; i++)
5938 if (loops->parray[i])
5940 free (loops->parray[i]->aux);
5941 loops->parray[i]->aux = NULL;
5944 free_loop_data (data);
5945 free (data->version_info);
5946 BITMAP_FREE (data->relevant);
5947 BITMAP_FREE (data->important_candidates);
5948 htab_delete (data->niters);
5950 VEC_free (tree, heap, decl_rtl_to_reset);
5951 VEC_free (iv_use_p, heap, data->iv_uses);
5952 VEC_free (iv_cand_p, heap, data->iv_candidates);
5955 /* Optimizes the LOOP. Returns true if anything changed. */
5957 static bool
5958 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5960 bool changed = false;
5961 struct iv_ca *iv_ca;
5962 edge exit;
5964 data->current_loop = loop;
5966 if (dump_file && (dump_flags & TDF_DETAILS))
5968 fprintf (dump_file, "Processing loop %d\n", loop->num);
5970 exit = single_dom_exit (loop);
5971 if (exit)
5973 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5974 exit->src->index, exit->dest->index);
5975 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5976 fprintf (dump_file, "\n");
5979 fprintf (dump_file, "\n");
5982 /* For each ssa name determines whether it behaves as an induction variable
5983 in some loop. */
5984 if (!find_induction_variables (data))
5985 goto finish;
5987 /* Finds interesting uses (item 1). */
5988 find_interesting_uses (data);
5989 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5990 goto finish;
5992 /* Finds candidates for the induction variables (item 2). */
5993 find_iv_candidates (data);
5995 /* Calculates the costs (item 3, part 1). */
5996 determine_use_iv_costs (data);
5997 determine_iv_costs (data);
5998 determine_set_costs (data);
6000 /* Find the optimal set of induction variables (item 3, part 2). */
6001 iv_ca = find_optimal_iv_set (data);
6002 if (!iv_ca)
6003 goto finish;
6004 changed = true;
6006 /* Create the new induction variables (item 4, part 1). */
6007 create_new_ivs (data, iv_ca);
6008 iv_ca_free (&iv_ca);
6010 /* Rewrite the uses (item 4, part 2). */
6011 rewrite_uses (data);
6013 /* Remove the ivs that are unused after rewriting. */
6014 remove_unused_ivs (data);
6016 /* We have changed the structure of induction variables; it might happen
6017 that definitions in the scev database refer to some of them that were
6018 eliminated. */
6019 scev_reset ();
6021 finish:
6022 free_loop_data (data);
6024 return changed;
6027 /* Main entry point. Optimizes induction variables in LOOPS. */
6029 void
6030 tree_ssa_iv_optimize (struct loops *loops)
6032 struct loop *loop;
6033 struct ivopts_data data;
6035 tree_ssa_iv_optimize_init (loops, &data);
6037 /* Optimize the loops starting with the innermost ones. */
6038 loop = loops->tree_root;
6039 while (loop->inner)
6040 loop = loop->inner;
6042 /* Scan the loops, inner ones first. */
6043 while (loop != loops->tree_root)
6045 if (dump_file && (dump_flags & TDF_DETAILS))
6046 flow_loop_dump (loop, dump_file, NULL, 1);
6048 tree_ssa_iv_optimize_loop (&data, loop);
6050 if (loop->next)
6052 loop = loop->next;
6053 while (loop->inner)
6054 loop = loop->inner;
6056 else
6057 loop = loop->outer;
6060 tree_ssa_iv_optimize_finalize (loops, &data);