2005-07-07 Adrian Straetling <straetling@de.ibm.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob8e6b8c168d41cf64bdb23ae75b928de32116e2fa
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
208 struct ivopts_data
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
244 struct iv_ca
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
259 bitmap cands;
261 /* The number of candidates in the set. */
262 unsigned n_cands;
264 /* Total number of registers needed. */
265 unsigned n_regs;
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
271 unsigned cand_cost;
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
277 unsigned cost;
280 /* Difference of two iv candidate assignments. */
282 struct iv_ca_delta
284 /* Changed use. */
285 struct iv_use *use;
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
356 return loop->aux;
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 static edge
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
366 if (!exit)
367 return NULL;
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
372 return exit;
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
381 if (iv->ssa_name)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
392 if (iv->step)
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
402 else
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
409 if (iv->base_object)
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
428 switch (use->type)
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
554 a &= mask;
555 b &= mask;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
566 if (!(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
589 *x = val;
591 return true;
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
602 gcc_assert (bb);
604 if (sbb == loop->latch)
605 return true;
607 if (sbb != bb)
608 return false;
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
626 if (stmt_bb != cand_bb)
627 return true;
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
646 switch (cand->pos)
648 case IP_END:
649 return false;
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
657 default:
658 gcc_unreachable ();
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
665 struct nfe_cache_elt
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
679 static hashval_t
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
689 static int
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
710 if (!*slot)
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
716 *slot = nfe_desc;
718 else
719 nfe_desc = *slot;
721 if (!nfe_desc->valid_p)
722 return NULL;
724 return &nfe_desc->niter;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
729 goes wrong. */
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
734 edge exit = single_dom_exit (data->current_loop);
736 if (!exit)
737 return NULL;
739 return niter_for_exit (data, exit);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
745 static void
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
748 unsigned i;
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
770 static tree
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
779 switch (code)
781 case INTEGER_CST:
782 return NULL_TREE;
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
788 if (!base)
789 return expr;
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
797 case PLUS_EXPR:
798 case MINUS_EXPR:
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
802 if (!op1)
803 return op0;
805 if (!op0)
806 return (code == PLUS_EXPR
807 ? op1
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
810 return fold_build2 (code, ptr_type_node, op0, op1);
812 case NOP_EXPR:
813 case CONVERT_EXPR:
814 return determine_base_object (TREE_OPERAND (expr, 0));
816 default:
817 return fold_convert (ptr_type_node, expr);
821 /* Allocates an induction variable with given initial value BASE and step STEP
822 for loop LOOP. */
824 static struct iv *
825 alloc_iv (tree base, tree step)
827 struct iv *iv = xcalloc (1, sizeof (struct iv));
829 if (step && integer_zerop (step))
830 step = NULL_TREE;
832 iv->base = base;
833 iv->base_object = determine_base_object (base);
834 iv->step = step;
835 iv->biv_p = false;
836 iv->have_use_for = false;
837 iv->use_id = 0;
838 iv->ssa_name = NULL_TREE;
840 return iv;
843 /* Sets STEP and BASE for induction variable IV. */
845 static void
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
848 struct version_info *info = name_info (data, iv);
850 gcc_assert (!info->iv);
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
857 /* Finds induction variable declaration for VAR. */
859 static struct iv *
860 get_iv (struct ivopts_data *data, tree var)
862 basic_block bb;
864 if (!name_info (data, var)->iv)
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
868 if (!bb
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
873 return name_info (data, var)->iv;
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
879 static tree
880 determine_biv_step (tree phi)
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi), base, step;
885 if (!is_gimple_reg (name))
886 return NULL_TREE;
888 if (!simple_iv (loop, phi, name, &base, &step, true))
889 return NULL_TREE;
891 if (zero_p (step))
892 return NULL_TREE;
894 return step;
897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
899 static bool
900 abnormal_ssa_name_p (tree exp)
902 if (!exp)
903 return false;
905 if (TREE_CODE (exp) != SSA_NAME)
906 return false;
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
912 abnormal phi node. Callback for for_each_index. */
914 static bool
915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
916 void *data ATTRIBUTE_UNUSED)
918 if (TREE_CODE (base) == ARRAY_REF)
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 return false;
922 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
923 return false;
926 return !abnormal_ssa_name_p (*index);
929 /* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
932 static bool
933 contains_abnormal_ssa_name_p (tree expr)
935 enum tree_code code;
936 enum tree_code_class class;
938 if (!expr)
939 return false;
941 code = TREE_CODE (expr);
942 class = TREE_CODE_CLASS (code);
944 if (code == SSA_NAME)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
947 if (code == INTEGER_CST
948 || is_gimple_min_invariant (expr))
949 return false;
951 if (code == ADDR_EXPR)
952 return !for_each_index (&TREE_OPERAND (expr, 0),
953 idx_contains_abnormal_ssa_name_p,
954 NULL);
956 switch (class)
958 case tcc_binary:
959 case tcc_comparison:
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
961 return true;
963 /* Fallthru. */
964 case tcc_unary:
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
966 return true;
968 break;
970 default:
971 gcc_unreachable ();
974 return false;
977 /* Finds basic ivs. */
979 static bool
980 find_bivs (struct ivopts_data *data)
982 tree phi, step, type, base;
983 bool found = false;
984 struct loop *loop = data->current_loop;
986 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
989 continue;
991 step = determine_biv_step (phi);
992 if (!step)
993 continue;
995 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
996 base = expand_simple_operations (base);
997 if (contains_abnormal_ssa_name_p (base)
998 || contains_abnormal_ssa_name_p (step))
999 continue;
1001 type = TREE_TYPE (PHI_RESULT (phi));
1002 base = fold_convert (type, base);
1003 if (step)
1004 step = fold_convert (type, step);
1006 set_iv (data, PHI_RESULT (phi), base, step);
1007 found = true;
1010 return found;
1013 /* Marks basic ivs. */
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1018 tree phi, var;
1019 struct iv *iv, *incr_iv;
1020 struct loop *loop = data->current_loop;
1021 basic_block incr_bb;
1023 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025 iv = get_iv (data, PHI_RESULT (phi));
1026 if (!iv)
1027 continue;
1029 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030 incr_iv = get_iv (data, var);
1031 if (!incr_iv)
1032 continue;
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036 if (incr_bb->loop_father != data->current_loop
1037 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038 continue;
1040 iv->biv_p = true;
1041 incr_iv->biv_p = true;
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050 tree *base, tree *step)
1052 tree lhs;
1053 struct loop *loop = data->current_loop;
1055 *base = NULL_TREE;
1056 *step = NULL_TREE;
1058 if (TREE_CODE (stmt) != MODIFY_EXPR)
1059 return false;
1061 lhs = TREE_OPERAND (stmt, 0);
1062 if (TREE_CODE (lhs) != SSA_NAME)
1063 return false;
1065 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066 return false;
1067 *base = expand_simple_operations (*base);
1069 if (contains_abnormal_ssa_name_p (*base)
1070 || contains_abnormal_ssa_name_p (*step))
1071 return false;
1073 return true;
1076 /* Finds general ivs in statement STMT. */
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 tree base, step;
1083 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084 return;
1086 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1089 /* Finds general ivs in basic block BB. */
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 block_stmt_iterator bsi;
1096 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097 find_givs_in_stmt (data, bsi_stmt (bsi));
1100 /* Finds general ivs. */
1102 static void
1103 find_givs (struct ivopts_data *data)
1105 struct loop *loop = data->current_loop;
1106 basic_block *body = get_loop_body_in_dom_order (loop);
1107 unsigned i;
1109 for (i = 0; i < loop->num_nodes; i++)
1110 find_givs_in_bb (data, body[i]);
1111 free (body);
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1120 unsigned i;
1121 bitmap_iterator bi;
1123 if (!find_bivs (data))
1124 return false;
1126 find_givs (data);
1127 mark_bivs (data);
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1131 struct tree_niter_desc *niter;
1133 niter = niter_for_single_dom_exit (data);
1135 if (niter)
1137 fprintf (dump_file, " number of iterations ");
1138 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139 fprintf (dump_file, "\n");
1141 fprintf (dump_file, " may be zero if ");
1142 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "\n");
1147 fprintf (dump_file, "Induction variables:\n\n");
1149 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151 if (ver_info (data, i)->iv)
1152 dump_iv (dump_file, ver_info (data, i)->iv);
1156 return true;
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163 tree stmt, enum use_type use_type)
1165 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167 use->id = n_iv_uses (data);
1168 use->type = use_type;
1169 use->iv = iv;
1170 use->stmt = stmt;
1171 use->op_p = use_p;
1172 use->related_cands = BITMAP_ALLOC (NULL);
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1175 caller. */
1176 iv->ssa_name = NULL_TREE;
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 dump_use (dump_file, use);
1181 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183 return use;
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 basic_block bb;
1194 struct version_info *info;
1196 if (TREE_CODE (op) != SSA_NAME
1197 || !is_gimple_reg (op))
1198 return;
1200 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201 if (bb
1202 && flow_bb_inside_loop_p (data->current_loop, bb))
1203 return;
1205 info = name_info (data, op);
1206 info->name = op;
1207 info->has_nonlin_use |= nonlinear_use;
1208 if (!info->inv_id)
1209 info->inv_id = ++data->max_inv_id;
1210 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1213 /* Checks whether the use OP is interesting and if so, records it
1214 as TYPE. */
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218 enum use_type type)
1220 struct iv *iv;
1221 struct iv *civ;
1222 tree stmt;
1223 struct iv_use *use;
1225 if (TREE_CODE (op) != SSA_NAME)
1226 return NULL;
1228 iv = get_iv (data, op);
1229 if (!iv)
1230 return NULL;
1232 if (iv->have_use_for)
1234 use = iv_use (data, iv->use_id);
1236 gcc_assert (use->type == USE_NONLINEAR_EXPR
1237 || use->type == USE_OUTER);
1239 if (type == USE_NONLINEAR_EXPR)
1240 use->type = USE_NONLINEAR_EXPR;
1241 return use;
1244 if (zero_p (iv->step))
1246 record_invariant (data, op, true);
1247 return NULL;
1249 iv->have_use_for = true;
1251 civ = xmalloc (sizeof (struct iv));
1252 *civ = *iv;
1254 stmt = SSA_NAME_DEF_STMT (op);
1255 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256 || TREE_CODE (stmt) == MODIFY_EXPR);
1258 use = record_use (data, NULL, civ, stmt, type);
1259 iv->use_id = use->id;
1261 return use;
1264 /* Checks whether the use OP is interesting and if so, records it. */
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1272 /* Records a definition of induction variable OP that is used outside of the
1273 loop. */
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 tree *op0_p;
1288 tree *op1_p;
1289 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290 struct iv const_iv;
1291 tree zero = integer_zero_node;
1293 const_iv.step = NULL_TREE;
1295 if (TREE_CODE (*cond_p) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p))
1297 return;
1299 if (TREE_CODE (*cond_p) == SSA_NAME)
1301 op0_p = cond_p;
1302 op1_p = &zero;
1304 else
1306 op0_p = &TREE_OPERAND (*cond_p, 0);
1307 op1_p = &TREE_OPERAND (*cond_p, 1);
1310 if (TREE_CODE (*op0_p) == SSA_NAME)
1311 iv0 = get_iv (data, *op0_p);
1312 else
1313 iv0 = &const_iv;
1315 if (TREE_CODE (*op1_p) == SSA_NAME)
1316 iv1 = get_iv (data, *op1_p);
1317 else
1318 iv1 = &const_iv;
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1322 (!iv0 || !iv1)
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327 find_interesting_uses_op (data, *op0_p);
1328 find_interesting_uses_op (data, *op1_p);
1329 return;
1332 if (zero_p (iv0->step) && zero_p (iv1->step))
1334 /* If both are invariants, this is a work for unswitching. */
1335 return;
1338 civ = xmalloc (sizeof (struct iv));
1339 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 basic_block def_bb;
1350 unsigned i, len;
1352 if (is_gimple_min_invariant (expr))
1353 return true;
1355 if (TREE_CODE (expr) == SSA_NAME)
1357 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358 if (def_bb
1359 && flow_bb_inside_loop_p (loop, def_bb))
1360 return false;
1362 return true;
1365 if (!EXPR_P (expr))
1366 return false;
1368 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369 for (i = 0; i < len; i++)
1370 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371 return false;
1373 return true;
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1380 struct ifs_ivopts_data
1382 struct ivopts_data *ivopts_data;
1383 tree stmt;
1384 tree *step_p;
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1390 struct ifs_ivopts_data *dta = data;
1391 struct iv *iv;
1392 tree step, iv_step, lbound, off;
1393 struct loop *loop = dta->ivopts_data->current_loop;
1395 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397 return false;
1399 /* If base is a component ref, require that the offset of the reference
1400 be invariant. */
1401 if (TREE_CODE (base) == COMPONENT_REF)
1403 off = component_ref_field_offset (base);
1404 return expr_invariant_in_loop_p (loop, off);
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base) == ARRAY_REF)
1413 step = array_ref_element_size (base);
1414 lbound = array_ref_low_bound (base);
1416 if (!expr_invariant_in_loop_p (loop, step)
1417 || !expr_invariant_in_loop_p (loop, lbound))
1418 return false;
1421 if (TREE_CODE (*idx) != SSA_NAME)
1422 return true;
1424 iv = get_iv (dta->ivopts_data, *idx);
1425 if (!iv)
1426 return false;
1428 *idx = iv->base;
1430 if (!iv->step)
1431 return true;
1433 if (TREE_CODE (base) == ARRAY_REF)
1435 step = array_ref_element_size (base);
1437 /* We only handle addresses whose step is an integer constant. */
1438 if (TREE_CODE (step) != INTEGER_CST)
1439 return false;
1441 else
1442 /* The step for pointer arithmetics already is 1 byte. */
1443 step = build_int_cst (sizetype, 1);
1445 iv_step = convert_step (dta->ivopts_data->current_loop,
1446 sizetype, iv->base, iv->step, dta->stmt);
1448 if (!iv_step)
1450 /* The index might wrap. */
1451 return false;
1454 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1456 if (!*dta->step_p)
1457 *dta->step_p = step;
1458 else
1459 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1461 return true;
1464 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1465 object is passed to it in DATA. */
1467 static bool
1468 idx_record_use (tree base, tree *idx,
1469 void *data)
1471 find_interesting_uses_op (data, *idx);
1472 if (TREE_CODE (base) == ARRAY_REF)
1474 find_interesting_uses_op (data, array_ref_element_size (base));
1475 find_interesting_uses_op (data, array_ref_low_bound (base));
1477 return true;
1480 /* Returns true if memory reference REF may be unaligned. */
1482 static bool
1483 may_be_unaligned_p (tree ref)
1485 tree base;
1486 tree base_type;
1487 HOST_WIDE_INT bitsize;
1488 HOST_WIDE_INT bitpos;
1489 tree toffset;
1490 enum machine_mode mode;
1491 int unsignedp, volatilep;
1492 unsigned base_align;
1494 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1495 thus they are not misaligned. */
1496 if (TREE_CODE (ref) == TARGET_MEM_REF)
1497 return false;
1499 /* The test below is basically copy of what expr.c:normal_inner_ref
1500 does to check whether the object must be loaded by parts when
1501 STRICT_ALIGNMENT is true. */
1502 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503 &unsignedp, &volatilep, true);
1504 base_type = TREE_TYPE (base);
1505 base_align = TYPE_ALIGN (base_type);
1507 if (mode != BLKmode
1508 && (base_align < GET_MODE_ALIGNMENT (mode)
1509 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510 || bitpos % BITS_PER_UNIT != 0))
1511 return true;
1513 return false;
1516 /* Finds addresses in *OP_P inside STMT. */
1518 static void
1519 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1521 tree base = *op_p, step = NULL;
1522 struct iv *civ;
1523 struct ifs_ivopts_data ifs_ivopts_data;
1525 /* Do not play with volatile memory references. A bit too conservative,
1526 perhaps, but safe. */
1527 if (stmt_ann (stmt)->has_volatile_ops)
1528 goto fail;
1530 /* Ignore bitfields for now. Not really something terribly complicated
1531 to handle. TODO. */
1532 if (TREE_CODE (base) == COMPONENT_REF
1533 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1534 goto fail;
1536 if (STRICT_ALIGNMENT
1537 && may_be_unaligned_p (base))
1538 goto fail;
1540 base = unshare_expr (base);
1542 if (TREE_CODE (base) == TARGET_MEM_REF)
1544 tree type = build_pointer_type (TREE_TYPE (base));
1545 tree astep;
1547 if (TMR_BASE (base)
1548 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1550 civ = get_iv (data, TMR_BASE (base));
1551 if (!civ)
1552 goto fail;
1554 TMR_BASE (base) = civ->base;
1555 step = civ->step;
1557 if (TMR_INDEX (base)
1558 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1560 civ = get_iv (data, TMR_INDEX (base));
1561 if (!civ)
1562 goto fail;
1564 TMR_INDEX (base) = civ->base;
1565 astep = civ->step;
1567 if (astep)
1569 if (TMR_STEP (base))
1570 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572 if (step)
1573 step = fold_build2 (PLUS_EXPR, type, step, astep);
1574 else
1575 step = astep;
1579 if (zero_p (step))
1580 goto fail;
1581 base = tree_mem_ref_addr (type, base);
1583 else
1585 ifs_ivopts_data.ivopts_data = data;
1586 ifs_ivopts_data.stmt = stmt;
1587 ifs_ivopts_data.step_p = &step;
1588 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1589 || zero_p (step))
1590 goto fail;
1592 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1593 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1595 base = build_fold_addr_expr (base);
1598 civ = alloc_iv (base, step);
1599 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1600 return;
1602 fail:
1603 for_each_index (op_p, idx_record_use, data);
1606 /* Finds and records invariants used in STMT. */
1608 static void
1609 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 ssa_op_iter iter;
1612 use_operand_p use_p;
1613 tree op;
1615 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1617 op = USE_FROM_PTR (use_p);
1618 record_invariant (data, op, false);
1622 /* Finds interesting uses of induction variables in the statement STMT. */
1624 static void
1625 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1627 struct iv *iv;
1628 tree op, lhs, rhs;
1629 ssa_op_iter iter;
1630 use_operand_p use_p;
1632 find_invariants_stmt (data, stmt);
1634 if (TREE_CODE (stmt) == COND_EXPR)
1636 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1637 return;
1640 if (TREE_CODE (stmt) == MODIFY_EXPR)
1642 lhs = TREE_OPERAND (stmt, 0);
1643 rhs = TREE_OPERAND (stmt, 1);
1645 if (TREE_CODE (lhs) == SSA_NAME)
1647 /* If the statement defines an induction variable, the uses are not
1648 interesting by themselves. */
1650 iv = get_iv (data, lhs);
1652 if (iv && !zero_p (iv->step))
1653 return;
1656 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1658 case tcc_comparison:
1659 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1660 return;
1662 case tcc_reference:
1663 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1664 if (REFERENCE_CLASS_P (lhs))
1665 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1666 return;
1668 default: ;
1671 if (REFERENCE_CLASS_P (lhs)
1672 && is_gimple_val (rhs))
1674 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1675 find_interesting_uses_op (data, rhs);
1676 return;
1679 /* TODO -- we should also handle address uses of type
1681 memory = call (whatever);
1685 call (memory). */
1688 if (TREE_CODE (stmt) == PHI_NODE
1689 && bb_for_stmt (stmt) == data->current_loop->header)
1691 lhs = PHI_RESULT (stmt);
1692 iv = get_iv (data, lhs);
1694 if (iv && !zero_p (iv->step))
1695 return;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700 op = USE_FROM_PTR (use_p);
1702 if (TREE_CODE (op) != SSA_NAME)
1703 continue;
1705 iv = get_iv (data, op);
1706 if (!iv)
1707 continue;
1709 find_interesting_uses_op (data, op);
1713 /* Finds interesting uses of induction variables outside of loops
1714 on loop exit edge EXIT. */
1716 static void
1717 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1719 tree phi, def;
1721 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1723 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1724 find_interesting_uses_outer (data, def);
1728 /* Finds uses of the induction variables that are interesting. */
1730 static void
1731 find_interesting_uses (struct ivopts_data *data)
1733 basic_block bb;
1734 block_stmt_iterator bsi;
1735 tree phi;
1736 basic_block *body = get_loop_body (data->current_loop);
1737 unsigned i;
1738 struct version_info *info;
1739 edge e;
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1742 fprintf (dump_file, "Uses:\n\n");
1744 for (i = 0; i < data->current_loop->num_nodes; i++)
1746 edge_iterator ei;
1747 bb = body[i];
1749 FOR_EACH_EDGE (e, ei, bb->succs)
1750 if (e->dest != EXIT_BLOCK_PTR
1751 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1752 find_interesting_uses_outside (data, e);
1754 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1755 find_interesting_uses_stmt (data, phi);
1756 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1757 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1760 if (dump_file && (dump_flags & TDF_DETAILS))
1762 bitmap_iterator bi;
1764 fprintf (dump_file, "\n");
1766 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1768 info = ver_info (data, i);
1769 if (info->inv_id)
1771 fprintf (dump_file, " ");
1772 print_generic_expr (dump_file, info->name, TDF_SLIM);
1773 fprintf (dump_file, " is invariant (%d)%s\n",
1774 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778 fprintf (dump_file, "\n");
1781 free (body);
1784 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1785 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1786 we are at the top-level of the processed address. */
1788 static tree
1789 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1790 unsigned HOST_WIDE_INT *offset)
1792 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1793 enum tree_code code;
1794 tree type, orig_type = TREE_TYPE (expr);
1795 unsigned HOST_WIDE_INT off0, off1, st;
1796 tree orig_expr = expr;
1798 STRIP_NOPS (expr);
1800 type = TREE_TYPE (expr);
1801 code = TREE_CODE (expr);
1802 *offset = 0;
1804 switch (code)
1806 case INTEGER_CST:
1807 if (!cst_and_fits_in_hwi (expr)
1808 || zero_p (expr))
1809 return orig_expr;
1811 *offset = int_cst_value (expr);
1812 return build_int_cst_type (orig_type, 0);
1814 case PLUS_EXPR:
1815 case MINUS_EXPR:
1816 op0 = TREE_OPERAND (expr, 0);
1817 op1 = TREE_OPERAND (expr, 1);
1819 op0 = strip_offset_1 (op0, false, false, &off0);
1820 op1 = strip_offset_1 (op1, false, false, &off1);
1822 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1823 if (op0 == TREE_OPERAND (expr, 0)
1824 && op1 == TREE_OPERAND (expr, 1))
1825 return orig_expr;
1827 if (zero_p (op1))
1828 expr = op0;
1829 else if (zero_p (op0))
1831 if (code == PLUS_EXPR)
1832 expr = op1;
1833 else
1834 expr = fold_build1 (NEGATE_EXPR, type, op1);
1836 else
1837 expr = fold_build2 (code, type, op0, op1);
1839 return fold_convert (orig_type, expr);
1841 case ARRAY_REF:
1842 if (!inside_addr)
1843 return orig_expr;
1845 step = array_ref_element_size (expr);
1846 if (!cst_and_fits_in_hwi (step))
1847 break;
1849 st = int_cst_value (step);
1850 op1 = TREE_OPERAND (expr, 1);
1851 op1 = strip_offset_1 (op1, false, false, &off1);
1852 *offset = off1 * st;
1854 if (top_compref
1855 && zero_p (op1))
1857 /* Strip the component reference completely. */
1858 op0 = TREE_OPERAND (expr, 0);
1859 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1860 *offset += off0;
1861 return op0;
1863 break;
1865 case COMPONENT_REF:
1866 if (!inside_addr)
1867 return orig_expr;
1869 tmp = component_ref_field_offset (expr);
1870 if (top_compref
1871 && cst_and_fits_in_hwi (tmp))
1873 /* Strip the component reference completely. */
1874 op0 = TREE_OPERAND (expr, 0);
1875 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1876 *offset = off0 + int_cst_value (tmp);
1877 return op0;
1879 break;
1881 case ADDR_EXPR:
1882 op0 = TREE_OPERAND (expr, 0);
1883 op0 = strip_offset_1 (op0, true, true, &off0);
1884 *offset += off0;
1886 if (op0 == TREE_OPERAND (expr, 0))
1887 return orig_expr;
1889 expr = build_fold_addr_expr (op0);
1890 return fold_convert (orig_type, expr);
1892 case INDIRECT_REF:
1893 inside_addr = false;
1894 break;
1896 default:
1897 return orig_expr;
1900 /* Default handling of expressions for that we want to recurse into
1901 the first operand. */
1902 op0 = TREE_OPERAND (expr, 0);
1903 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1904 *offset += off0;
1906 if (op0 == TREE_OPERAND (expr, 0)
1907 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1908 return orig_expr;
1910 expr = copy_node (expr);
1911 TREE_OPERAND (expr, 0) = op0;
1912 if (op1)
1913 TREE_OPERAND (expr, 1) = op1;
1915 /* Inside address, we might strip the top level component references,
1916 thus changing type of the expression. Handling of ADDR_EXPR
1917 will fix that. */
1918 expr = fold_convert (orig_type, expr);
1920 return expr;
1923 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1925 static tree
1926 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1928 return strip_offset_1 (expr, false, false, offset);
1931 /* Returns variant of TYPE that can be used as base for different uses.
1932 For integer types, we return unsigned variant of the type, which
1933 avoids problems with overflows. For pointer types, we return void *. */
1935 static tree
1936 generic_type_for (tree type)
1938 if (POINTER_TYPE_P (type))
1939 return ptr_type_node;
1941 if (TYPE_UNSIGNED (type))
1942 return type;
1944 return unsigned_type_for (type);
1947 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1948 the bitmap to that we should store it. */
1950 static struct ivopts_data *fd_ivopts_data;
1951 static tree
1952 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1954 bitmap *depends_on = data;
1955 struct version_info *info;
1957 if (TREE_CODE (*expr_p) != SSA_NAME)
1958 return NULL_TREE;
1959 info = name_info (fd_ivopts_data, *expr_p);
1961 if (!info->inv_id || info->has_nonlin_use)
1962 return NULL_TREE;
1964 if (!*depends_on)
1965 *depends_on = BITMAP_ALLOC (NULL);
1966 bitmap_set_bit (*depends_on, info->inv_id);
1968 return NULL_TREE;
1971 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1972 position to POS. If USE is not NULL, the candidate is set as related to
1973 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1974 replacement of the final value of the iv by a direct computation. */
1976 static struct iv_cand *
1977 add_candidate_1 (struct ivopts_data *data,
1978 tree base, tree step, bool important, enum iv_position pos,
1979 struct iv_use *use, tree incremented_at)
1981 unsigned i;
1982 struct iv_cand *cand = NULL;
1983 tree type, orig_type;
1985 if (base)
1987 orig_type = TREE_TYPE (base);
1988 type = generic_type_for (orig_type);
1989 if (type != orig_type)
1991 base = fold_convert (type, base);
1992 if (step)
1993 step = fold_convert (type, step);
1997 for (i = 0; i < n_iv_cands (data); i++)
1999 cand = iv_cand (data, i);
2001 if (cand->pos != pos)
2002 continue;
2004 if (cand->incremented_at != incremented_at)
2005 continue;
2007 if (!cand->iv)
2009 if (!base && !step)
2010 break;
2012 continue;
2015 if (!base && !step)
2016 continue;
2018 if (!operand_equal_p (base, cand->iv->base, 0))
2019 continue;
2021 if (zero_p (cand->iv->step))
2023 if (zero_p (step))
2024 break;
2026 else
2028 if (step && operand_equal_p (step, cand->iv->step, 0))
2029 break;
2033 if (i == n_iv_cands (data))
2035 cand = xcalloc (1, sizeof (struct iv_cand));
2036 cand->id = i;
2038 if (!base && !step)
2039 cand->iv = NULL;
2040 else
2041 cand->iv = alloc_iv (base, step);
2043 cand->pos = pos;
2044 if (pos != IP_ORIGINAL && cand->iv)
2046 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2047 cand->var_after = cand->var_before;
2049 cand->important = important;
2050 cand->incremented_at = incremented_at;
2051 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053 if (step
2054 && TREE_CODE (step) != INTEGER_CST)
2056 fd_ivopts_data = data;
2057 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2061 dump_cand (dump_file, cand);
2064 if (important && !cand->important)
2066 cand->important = true;
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2071 if (use)
2073 bitmap_set_bit (use->related_cands, i);
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 fprintf (dump_file, "Candidate %d is related to use %d\n",
2076 cand->id, use->id);
2079 return cand;
2082 /* Returns true if incrementing the induction variable at the end of the LOOP
2083 is allowed.
2085 The purpose is to avoid splitting latch edge with a biv increment, thus
2086 creating a jump, possibly confusing other optimization passes and leaving
2087 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2088 is not available (so we do not have a better alternative), or if the latch
2089 edge is already nonempty. */
2091 static bool
2092 allow_ip_end_pos_p (struct loop *loop)
2094 if (!ip_normal_pos (loop))
2095 return true;
2097 if (!empty_block_p (ip_end_pos (loop)))
2098 return true;
2100 return false;
2103 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2104 position to POS. If USE is not NULL, the candidate is set as related to
2105 it. The candidate computation is scheduled on all available positions. */
2107 static void
2108 add_candidate (struct ivopts_data *data,
2109 tree base, tree step, bool important, struct iv_use *use)
2111 if (ip_normal_pos (data->current_loop))
2112 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2113 if (ip_end_pos (data->current_loop)
2114 && allow_ip_end_pos_p (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2118 /* Add a standard "0 + 1 * iteration" iv candidate for a
2119 type with SIZE bits. */
2121 static void
2122 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2123 unsigned int size)
2125 tree type = lang_hooks.types.type_for_size (size, true);
2126 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2127 true, NULL);
2130 /* Adds standard iv candidates. */
2132 static void
2133 add_standard_iv_candidates (struct ivopts_data *data)
2135 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2137 /* The same for a double-integer type if it is still fast enough. */
2138 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2139 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 /* Adds candidates bases on the old induction variable IV. */
2145 static void
2146 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 tree phi, def;
2149 struct iv_cand *cand;
2151 add_candidate (data, iv->base, iv->step, true, NULL);
2153 /* The same, but with initial value zero. */
2154 add_candidate (data,
2155 build_int_cst (TREE_TYPE (iv->base), 0),
2156 iv->step, true, NULL);
2158 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2159 if (TREE_CODE (phi) == PHI_NODE)
2161 /* Additionally record the possibility of leaving the original iv
2162 untouched. */
2163 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2164 cand = add_candidate_1 (data,
2165 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2166 SSA_NAME_DEF_STMT (def));
2167 cand->var_before = iv->ssa_name;
2168 cand->var_after = def;
2172 /* Adds candidates based on the old induction variables. */
2174 static void
2175 add_old_ivs_candidates (struct ivopts_data *data)
2177 unsigned i;
2178 struct iv *iv;
2179 bitmap_iterator bi;
2181 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2183 iv = ver_info (data, i)->iv;
2184 if (iv && iv->biv_p && !zero_p (iv->step))
2185 add_old_iv_candidates (data, iv);
2189 /* Adds candidates based on the value of the induction variable IV and USE. */
2191 static void
2192 add_iv_value_candidates (struct ivopts_data *data,
2193 struct iv *iv, struct iv_use *use)
2195 unsigned HOST_WIDE_INT offset;
2196 tree base;
2198 add_candidate (data, iv->base, iv->step, false, use);
2200 /* The same, but with initial value zero. Make such variable important,
2201 since it is generic enough so that possibly many uses may be based
2202 on it. */
2203 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2204 iv->step, true, use);
2206 /* Third, try removing the constant offset. */
2207 base = strip_offset (iv->base, &offset);
2208 if (offset)
2209 add_candidate (data, base, iv->step, false, use);
2212 /* Possibly adds pseudocandidate for replacing the final value of USE by
2213 a direct computation. */
2215 static void
2216 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2218 struct tree_niter_desc *niter;
2220 /* We must know where we exit the loop and how many times does it roll. */
2221 niter = niter_for_single_dom_exit (data);
2222 if (!niter
2223 || !zero_p (niter->may_be_zero))
2224 return;
2226 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2229 /* Adds candidates based on the uses. */
2231 static void
2232 add_derived_ivs_candidates (struct ivopts_data *data)
2234 unsigned i;
2236 for (i = 0; i < n_iv_uses (data); i++)
2238 struct iv_use *use = iv_use (data, i);
2240 if (!use)
2241 continue;
2243 switch (use->type)
2245 case USE_NONLINEAR_EXPR:
2246 case USE_COMPARE:
2247 case USE_ADDRESS:
2248 /* Just add the ivs based on the value of the iv used here. */
2249 add_iv_value_candidates (data, use->iv, use);
2250 break;
2252 case USE_OUTER:
2253 add_iv_value_candidates (data, use->iv, use);
2255 /* Additionally, add the pseudocandidate for the possibility to
2256 replace the final value by a direct computation. */
2257 add_iv_outer_candidates (data, use);
2258 break;
2260 default:
2261 gcc_unreachable ();
2266 /* Record important candidates and add them to related_cands bitmaps
2267 if needed. */
2269 static void
2270 record_important_candidates (struct ivopts_data *data)
2272 unsigned i;
2273 struct iv_use *use;
2275 for (i = 0; i < n_iv_cands (data); i++)
2277 struct iv_cand *cand = iv_cand (data, i);
2279 if (cand->important)
2280 bitmap_set_bit (data->important_candidates, i);
2283 data->consider_all_candidates = (n_iv_cands (data)
2284 <= CONSIDER_ALL_CANDIDATES_BOUND);
2286 if (data->consider_all_candidates)
2288 /* We will not need "related_cands" bitmaps in this case,
2289 so release them to decrease peak memory consumption. */
2290 for (i = 0; i < n_iv_uses (data); i++)
2292 use = iv_use (data, i);
2293 BITMAP_FREE (use->related_cands);
2296 else
2298 /* Add important candidates to the related_cands bitmaps. */
2299 for (i = 0; i < n_iv_uses (data); i++)
2300 bitmap_ior_into (iv_use (data, i)->related_cands,
2301 data->important_candidates);
2305 /* Finds the candidates for the induction variables. */
2307 static void
2308 find_iv_candidates (struct ivopts_data *data)
2310 /* Add commonly used ivs. */
2311 add_standard_iv_candidates (data);
2313 /* Add old induction variables. */
2314 add_old_ivs_candidates (data);
2316 /* Add induction variables derived from uses. */
2317 add_derived_ivs_candidates (data);
2319 /* Record the important candidates. */
2320 record_important_candidates (data);
2323 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2324 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2325 we allocate a simple list to every use. */
2327 static void
2328 alloc_use_cost_map (struct ivopts_data *data)
2330 unsigned i, size, s, j;
2332 for (i = 0; i < n_iv_uses (data); i++)
2334 struct iv_use *use = iv_use (data, i);
2335 bitmap_iterator bi;
2337 if (data->consider_all_candidates)
2338 size = n_iv_cands (data);
2339 else
2341 s = 0;
2342 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2344 s++;
2347 /* Round up to the power of two, so that moduling by it is fast. */
2348 for (size = 1; size < s; size <<= 1)
2349 continue;
2352 use->n_map_members = size;
2353 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2357 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2358 on invariants DEPENDS_ON and that the value used in expressing it
2359 is VALUE.*/
2361 static void
2362 set_use_iv_cost (struct ivopts_data *data,
2363 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2364 bitmap depends_on, tree value)
2366 unsigned i, s;
2368 if (cost == INFTY)
2370 BITMAP_FREE (depends_on);
2371 return;
2374 if (data->consider_all_candidates)
2376 use->cost_map[cand->id].cand = cand;
2377 use->cost_map[cand->id].cost = cost;
2378 use->cost_map[cand->id].depends_on = depends_on;
2379 use->cost_map[cand->id].value = value;
2380 return;
2383 /* n_map_members is a power of two, so this computes modulo. */
2384 s = cand->id & (use->n_map_members - 1);
2385 for (i = s; i < use->n_map_members; i++)
2386 if (!use->cost_map[i].cand)
2387 goto found;
2388 for (i = 0; i < s; i++)
2389 if (!use->cost_map[i].cand)
2390 goto found;
2392 gcc_unreachable ();
2394 found:
2395 use->cost_map[i].cand = cand;
2396 use->cost_map[i].cost = cost;
2397 use->cost_map[i].depends_on = depends_on;
2398 use->cost_map[i].value = value;
2401 /* Gets cost of (USE, CANDIDATE) pair. */
2403 static struct cost_pair *
2404 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2405 struct iv_cand *cand)
2407 unsigned i, s;
2408 struct cost_pair *ret;
2410 if (!cand)
2411 return NULL;
2413 if (data->consider_all_candidates)
2415 ret = use->cost_map + cand->id;
2416 if (!ret->cand)
2417 return NULL;
2419 return ret;
2422 /* n_map_members is a power of two, so this computes modulo. */
2423 s = cand->id & (use->n_map_members - 1);
2424 for (i = s; i < use->n_map_members; i++)
2425 if (use->cost_map[i].cand == cand)
2426 return use->cost_map + i;
2428 for (i = 0; i < s; i++)
2429 if (use->cost_map[i].cand == cand)
2430 return use->cost_map + i;
2432 return NULL;
2435 /* Returns estimate on cost of computing SEQ. */
2437 static unsigned
2438 seq_cost (rtx seq)
2440 unsigned cost = 0;
2441 rtx set;
2443 for (; seq; seq = NEXT_INSN (seq))
2445 set = single_set (seq);
2446 if (set)
2447 cost += rtx_cost (set, SET);
2448 else
2449 cost++;
2452 return cost;
2455 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2456 static rtx
2457 produce_memory_decl_rtl (tree obj, int *regno)
2459 rtx x;
2461 gcc_assert (obj);
2462 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2464 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2465 x = gen_rtx_SYMBOL_REF (Pmode, name);
2467 else
2468 x = gen_raw_REG (Pmode, (*regno)++);
2470 return gen_rtx_MEM (DECL_MODE (obj), x);
2473 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2474 walk_tree. DATA contains the actual fake register number. */
2476 static tree
2477 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2479 tree obj = NULL_TREE;
2480 rtx x = NULL_RTX;
2481 int *regno = data;
2483 switch (TREE_CODE (*expr_p))
2485 case ADDR_EXPR:
2486 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2487 handled_component_p (*expr_p);
2488 expr_p = &TREE_OPERAND (*expr_p, 0))
2489 continue;
2490 obj = *expr_p;
2491 if (DECL_P (obj))
2492 x = produce_memory_decl_rtl (obj, regno);
2493 break;
2495 case SSA_NAME:
2496 *ws = 0;
2497 obj = SSA_NAME_VAR (*expr_p);
2498 if (!DECL_RTL_SET_P (obj))
2499 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2500 break;
2502 case VAR_DECL:
2503 case PARM_DECL:
2504 case RESULT_DECL:
2505 *ws = 0;
2506 obj = *expr_p;
2508 if (DECL_RTL_SET_P (obj))
2509 break;
2511 if (DECL_MODE (obj) == BLKmode)
2512 x = produce_memory_decl_rtl (obj, regno);
2513 else
2514 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2516 break;
2518 default:
2519 break;
2522 if (x)
2524 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2525 SET_DECL_RTL (obj, x);
2528 return NULL_TREE;
2531 /* Determines cost of the computation of EXPR. */
2533 static unsigned
2534 computation_cost (tree expr)
2536 rtx seq, rslt;
2537 tree type = TREE_TYPE (expr);
2538 unsigned cost;
2539 /* Avoid using hard regs in ways which may be unsupported. */
2540 int regno = LAST_VIRTUAL_REGISTER + 1;
2542 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2543 start_sequence ();
2544 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2545 seq = get_insns ();
2546 end_sequence ();
2548 cost = seq_cost (seq);
2549 if (MEM_P (rslt))
2550 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2552 return cost;
2555 /* Returns variable containing the value of candidate CAND at statement AT. */
2557 static tree
2558 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2560 if (stmt_after_increment (loop, cand, stmt))
2561 return cand->var_after;
2562 else
2563 return cand->var_before;
2566 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2567 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2570 tree_int_cst_sign_bit (tree t)
2572 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2573 unsigned HOST_WIDE_INT w;
2575 if (bitno < HOST_BITS_PER_WIDE_INT)
2576 w = TREE_INT_CST_LOW (t);
2577 else
2579 w = TREE_INT_CST_HIGH (t);
2580 bitno -= HOST_BITS_PER_WIDE_INT;
2583 return (w >> bitno) & 1;
2586 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2587 return cst. Otherwise return NULL_TREE. */
2589 static tree
2590 constant_multiple_of (tree type, tree top, tree bot)
2592 tree res, mby, p0, p1;
2593 enum tree_code code;
2594 bool negate;
2596 STRIP_NOPS (top);
2597 STRIP_NOPS (bot);
2599 if (operand_equal_p (top, bot, 0))
2600 return build_int_cst (type, 1);
2602 code = TREE_CODE (top);
2603 switch (code)
2605 case MULT_EXPR:
2606 mby = TREE_OPERAND (top, 1);
2607 if (TREE_CODE (mby) != INTEGER_CST)
2608 return NULL_TREE;
2610 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2611 if (!res)
2612 return NULL_TREE;
2614 return fold_binary_to_constant (MULT_EXPR, type, res,
2615 fold_convert (type, mby));
2617 case PLUS_EXPR:
2618 case MINUS_EXPR:
2619 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2620 if (!p0)
2621 return NULL_TREE;
2622 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2623 if (!p1)
2624 return NULL_TREE;
2626 return fold_binary_to_constant (code, type, p0, p1);
2628 case INTEGER_CST:
2629 if (TREE_CODE (bot) != INTEGER_CST)
2630 return NULL_TREE;
2632 bot = fold_convert (type, bot);
2633 top = fold_convert (type, top);
2635 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2636 the result afterwards. */
2637 if (tree_int_cst_sign_bit (bot))
2639 negate = true;
2640 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2642 else
2643 negate = false;
2645 /* Ditto for TOP. */
2646 if (tree_int_cst_sign_bit (top))
2648 negate = !negate;
2649 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2652 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2653 return NULL_TREE;
2655 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2656 if (negate)
2657 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2658 return res;
2660 default:
2661 return NULL_TREE;
2665 /* Sets COMB to CST. */
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669 unsigned HOST_WIDE_INT cst)
2671 unsigned prec = TYPE_PRECISION (type);
2673 comb->type = type;
2674 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676 comb->n = 0;
2677 comb->rest = NULL_TREE;
2678 comb->offset = cst & comb->mask;
2681 /* Sets COMB to single element ELT. */
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2686 unsigned prec = TYPE_PRECISION (type);
2688 comb->type = type;
2689 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691 comb->n = 1;
2692 comb->elts[0] = elt;
2693 comb->coefs[0] = 1;
2694 comb->rest = NULL_TREE;
2695 comb->offset = 0;
2698 /* Scales COMB by SCALE. */
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702 unsigned HOST_WIDE_INT scale)
2704 unsigned i, j;
2706 if (scale == 1)
2707 return;
2709 if (scale == 0)
2711 aff_combination_const (comb, comb->type, 0);
2712 return;
2715 comb->offset = (scale * comb->offset) & comb->mask;
2716 for (i = 0, j = 0; i < comb->n; i++)
2718 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719 comb->elts[j] = comb->elts[i];
2720 if (comb->coefs[j] != 0)
2721 j++;
2723 comb->n = j;
2725 if (comb->rest)
2727 if (comb->n < MAX_AFF_ELTS)
2729 comb->coefs[comb->n] = scale;
2730 comb->elts[comb->n] = comb->rest;
2731 comb->rest = NULL_TREE;
2732 comb->n++;
2734 else
2735 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736 build_int_cst_type (comb->type, scale));
2740 /* Adds ELT * SCALE to COMB. */
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744 unsigned HOST_WIDE_INT scale)
2746 unsigned i;
2748 if (scale == 0)
2749 return;
2751 for (i = 0; i < comb->n; i++)
2752 if (operand_equal_p (comb->elts[i], elt, 0))
2754 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755 if (comb->coefs[i])
2756 return;
2758 comb->n--;
2759 comb->coefs[i] = comb->coefs[comb->n];
2760 comb->elts[i] = comb->elts[comb->n];
2761 return;
2763 if (comb->n < MAX_AFF_ELTS)
2765 comb->coefs[comb->n] = scale;
2766 comb->elts[comb->n] = elt;
2767 comb->n++;
2768 return;
2771 if (scale == 1)
2772 elt = fold_convert (comb->type, elt);
2773 else
2774 elt = fold_build2 (MULT_EXPR, comb->type,
2775 fold_convert (comb->type, elt),
2776 build_int_cst_type (comb->type, scale));
2778 if (comb->rest)
2779 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2780 else
2781 comb->rest = elt;
2784 /* Adds COMB2 to COMB1. */
2786 static void
2787 aff_combination_add (struct affine_tree_combination *comb1,
2788 struct affine_tree_combination *comb2)
2790 unsigned i;
2792 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2793 for (i = 0; i < comb2-> n; i++)
2794 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795 if (comb2->rest)
2796 aff_combination_add_elt (comb1, comb2->rest, 1);
2799 /* Splits EXPR into an affine combination of parts. */
2801 static void
2802 tree_to_aff_combination (tree expr, tree type,
2803 struct affine_tree_combination *comb)
2805 struct affine_tree_combination tmp;
2806 enum tree_code code;
2807 tree cst, core, toffset;
2808 HOST_WIDE_INT bitpos, bitsize;
2809 enum machine_mode mode;
2810 int unsignedp, volatilep;
2812 STRIP_NOPS (expr);
2814 code = TREE_CODE (expr);
2815 switch (code)
2817 case INTEGER_CST:
2818 aff_combination_const (comb, type, int_cst_value (expr));
2819 return;
2821 case PLUS_EXPR:
2822 case MINUS_EXPR:
2823 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825 if (code == MINUS_EXPR)
2826 aff_combination_scale (&tmp, -1);
2827 aff_combination_add (comb, &tmp);
2828 return;
2830 case MULT_EXPR:
2831 cst = TREE_OPERAND (expr, 1);
2832 if (TREE_CODE (cst) != INTEGER_CST)
2833 break;
2834 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835 aff_combination_scale (comb, int_cst_value (cst));
2836 return;
2838 case NEGATE_EXPR:
2839 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840 aff_combination_scale (comb, -1);
2841 return;
2843 case ADDR_EXPR:
2844 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845 &toffset, &mode, &unsignedp, &volatilep,
2846 false);
2847 if (bitpos % BITS_PER_UNIT != 0)
2848 break;
2849 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850 core = build_fold_addr_expr (core);
2851 if (TREE_CODE (core) == ADDR_EXPR)
2852 aff_combination_add_elt (comb, core, 1);
2853 else
2855 tree_to_aff_combination (core, type, &tmp);
2856 aff_combination_add (comb, &tmp);
2858 if (toffset)
2860 tree_to_aff_combination (toffset, type, &tmp);
2861 aff_combination_add (comb, &tmp);
2863 return;
2865 default:
2866 break;
2869 aff_combination_elt (comb, type, expr);
2872 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2874 static tree
2875 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876 unsigned HOST_WIDE_INT mask)
2878 enum tree_code code;
2880 scale &= mask;
2881 elt = fold_convert (type, elt);
2883 if (scale == 1)
2885 if (!expr)
2886 return elt;
2888 return fold_build2 (PLUS_EXPR, type, expr, elt);
2891 if (scale == mask)
2893 if (!expr)
2894 return fold_build1 (NEGATE_EXPR, type, elt);
2896 return fold_build2 (MINUS_EXPR, type, expr, elt);
2899 if (!expr)
2900 return fold_build2 (MULT_EXPR, type, elt,
2901 build_int_cst_type (type, scale));
2903 if ((scale | (mask >> 1)) == mask)
2905 /* Scale is negative. */
2906 code = MINUS_EXPR;
2907 scale = (-scale) & mask;
2909 else
2910 code = PLUS_EXPR;
2912 elt = fold_build2 (MULT_EXPR, type, elt,
2913 build_int_cst_type (type, scale));
2914 return fold_build2 (code, type, expr, elt);
2917 /* Copies the tree elements of COMB to ensure that they are not shared. */
2919 static void
2920 unshare_aff_combination (struct affine_tree_combination *comb)
2922 unsigned i;
2924 for (i = 0; i < comb->n; i++)
2925 comb->elts[i] = unshare_expr (comb->elts[i]);
2926 if (comb->rest)
2927 comb->rest = unshare_expr (comb->rest);
2930 /* Makes tree from the affine combination COMB. */
2932 static tree
2933 aff_combination_to_tree (struct affine_tree_combination *comb)
2935 tree type = comb->type;
2936 tree expr = comb->rest;
2937 unsigned i;
2938 unsigned HOST_WIDE_INT off, sgn;
2940 /* Handle the special case produced by get_computation_aff when
2941 the type does not fit in HOST_WIDE_INT. */
2942 if (comb->n == 0 && comb->offset == 0)
2943 return fold_convert (type, expr);
2945 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2947 for (i = 0; i < comb->n; i++)
2948 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2949 comb->mask);
2951 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2953 /* Offset is negative. */
2954 off = (-comb->offset) & comb->mask;
2955 sgn = comb->mask;
2957 else
2959 off = comb->offset;
2960 sgn = 1;
2962 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2963 comb->mask);
2966 /* Determines the expression by that USE is expressed from induction variable
2967 CAND at statement AT in LOOP. The expression is stored in a decomposed
2968 form into AFF. Returns false if USE cannot be expressed using CAND. */
2970 static bool
2971 get_computation_aff (struct loop *loop,
2972 struct iv_use *use, struct iv_cand *cand, tree at,
2973 struct affine_tree_combination *aff)
2975 tree ubase = use->iv->base;
2976 tree ustep = use->iv->step;
2977 tree cbase = cand->iv->base;
2978 tree cstep = cand->iv->step;
2979 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2980 tree uutype;
2981 tree expr, delta;
2982 tree ratio;
2983 unsigned HOST_WIDE_INT ustepi, cstepi;
2984 HOST_WIDE_INT ratioi;
2985 struct affine_tree_combination cbase_aff, expr_aff;
2986 tree cstep_orig = cstep, ustep_orig = ustep;
2988 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2990 /* We do not have a precision to express the values of use. */
2991 return false;
2994 expr = var_at_stmt (loop, cand, at);
2996 if (TREE_TYPE (expr) != ctype)
2998 /* This may happen with the original ivs. */
2999 expr = fold_convert (ctype, expr);
3002 if (TYPE_UNSIGNED (utype))
3003 uutype = utype;
3004 else
3006 uutype = unsigned_type_for (utype);
3007 ubase = fold_convert (uutype, ubase);
3008 ustep = fold_convert (uutype, ustep);
3011 if (uutype != ctype)
3013 expr = fold_convert (uutype, expr);
3014 cbase = fold_convert (uutype, cbase);
3015 cstep = fold_convert (uutype, cstep);
3017 /* If the conversion is not noop, we must take it into account when
3018 considering the value of the step. */
3019 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3020 cstep_orig = cstep;
3023 if (cst_and_fits_in_hwi (cstep_orig)
3024 && cst_and_fits_in_hwi (ustep_orig))
3026 ustepi = int_cst_value (ustep_orig);
3027 cstepi = int_cst_value (cstep_orig);
3029 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3031 /* TODO maybe consider case when ustep divides cstep and the ratio is
3032 a power of 2 (so that the division is fast to execute)? We would
3033 need to be much more careful with overflows etc. then. */
3034 return false;
3037 ratio = build_int_cst_type (uutype, ratioi);
3039 else
3041 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3042 if (!ratio)
3043 return false;
3045 /* Ratioi is only used to detect special cases when the multiplicative
3046 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3047 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3048 to integer_onep/integer_all_onesp, since the former ignores
3049 TREE_OVERFLOW. */
3050 if (cst_and_fits_in_hwi (ratio))
3051 ratioi = int_cst_value (ratio);
3052 else if (integer_onep (ratio))
3053 ratioi = 1;
3054 else if (integer_all_onesp (ratio))
3055 ratioi = -1;
3056 else
3057 ratioi = 0;
3060 /* We may need to shift the value if we are after the increment. */
3061 if (stmt_after_increment (loop, cand, at))
3062 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3064 /* use = ubase - ratio * cbase + ratio * var.
3066 In general case ubase + ratio * (var - cbase) could be better (one less
3067 multiplication), but often it is possible to eliminate redundant parts
3068 of computations from (ubase - ratio * cbase) term, and if it does not
3069 happen, fold is able to apply the distributive law to obtain this form
3070 anyway. */
3072 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3074 /* Let's compute in trees and just return the result in AFF. This case
3075 should not be very common, and fold itself is not that bad either,
3076 so making the aff. functions more complicated to handle this case
3077 is not that urgent. */
3078 if (ratioi == 1)
3080 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3081 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3083 else if (ratioi == -1)
3085 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3086 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3088 else
3090 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3091 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3092 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3093 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3096 aff->type = uutype;
3097 aff->n = 0;
3098 aff->offset = 0;
3099 aff->mask = 0;
3100 aff->rest = expr;
3101 return true;
3104 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3105 possible to compute ratioi. */
3106 gcc_assert (ratioi);
3108 tree_to_aff_combination (ubase, uutype, aff);
3109 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3110 tree_to_aff_combination (expr, uutype, &expr_aff);
3111 aff_combination_scale (&cbase_aff, -ratioi);
3112 aff_combination_scale (&expr_aff, ratioi);
3113 aff_combination_add (aff, &cbase_aff);
3114 aff_combination_add (aff, &expr_aff);
3116 return true;
3119 /* Determines the expression by that USE is expressed from induction variable
3120 CAND at statement AT in LOOP. The computation is unshared. */
3122 static tree
3123 get_computation_at (struct loop *loop,
3124 struct iv_use *use, struct iv_cand *cand, tree at)
3126 struct affine_tree_combination aff;
3127 tree type = TREE_TYPE (use->iv->base);
3129 if (!get_computation_aff (loop, use, cand, at, &aff))
3130 return NULL_TREE;
3131 unshare_aff_combination (&aff);
3132 return fold_convert (type, aff_combination_to_tree (&aff));
3135 /* Determines the expression by that USE is expressed from induction variable
3136 CAND in LOOP. The computation is unshared. */
3138 static tree
3139 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3141 return get_computation_at (loop, use, cand, use->stmt);
3144 /* Returns cost of addition in MODE. */
3146 static unsigned
3147 add_cost (enum machine_mode mode)
3149 static unsigned costs[NUM_MACHINE_MODES];
3150 rtx seq;
3151 unsigned cost;
3153 if (costs[mode])
3154 return costs[mode];
3156 start_sequence ();
3157 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3158 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3159 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3160 NULL_RTX);
3161 seq = get_insns ();
3162 end_sequence ();
3164 cost = seq_cost (seq);
3165 if (!cost)
3166 cost = 1;
3168 costs[mode] = cost;
3170 if (dump_file && (dump_flags & TDF_DETAILS))
3171 fprintf (dump_file, "Addition in %s costs %d\n",
3172 GET_MODE_NAME (mode), cost);
3173 return cost;
3176 /* Entry in a hashtable of already known costs for multiplication. */
3177 struct mbc_entry
3179 HOST_WIDE_INT cst; /* The constant to multiply by. */
3180 enum machine_mode mode; /* In mode. */
3181 unsigned cost; /* The cost. */
3184 /* Counts hash value for the ENTRY. */
3186 static hashval_t
3187 mbc_entry_hash (const void *entry)
3189 const struct mbc_entry *e = entry;
3191 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3194 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3196 static int
3197 mbc_entry_eq (const void *entry1, const void *entry2)
3199 const struct mbc_entry *e1 = entry1;
3200 const struct mbc_entry *e2 = entry2;
3202 return (e1->mode == e2->mode
3203 && e1->cst == e2->cst);
3206 /* Returns cost of multiplication by constant CST in MODE. */
3208 unsigned
3209 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3211 static htab_t costs;
3212 struct mbc_entry **cached, act;
3213 rtx seq;
3214 unsigned cost;
3216 if (!costs)
3217 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3219 act.mode = mode;
3220 act.cst = cst;
3221 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3222 if (*cached)
3223 return (*cached)->cost;
3225 *cached = xmalloc (sizeof (struct mbc_entry));
3226 (*cached)->mode = mode;
3227 (*cached)->cst = cst;
3229 start_sequence ();
3230 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3231 gen_int_mode (cst, mode), NULL_RTX, 0);
3232 seq = get_insns ();
3233 end_sequence ();
3235 cost = seq_cost (seq);
3237 if (dump_file && (dump_flags & TDF_DETAILS))
3238 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3239 (int) cst, GET_MODE_NAME (mode), cost);
3241 (*cached)->cost = cost;
3243 return cost;
3246 /* Returns true if multiplying by RATIO is allowed in address. */
3248 bool
3249 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3251 #define MAX_RATIO 128
3252 static sbitmap valid_mult;
3254 if (!valid_mult)
3256 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3257 rtx addr;
3258 HOST_WIDE_INT i;
3260 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3261 sbitmap_zero (valid_mult);
3262 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3263 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3265 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3266 if (memory_address_p (Pmode, addr))
3267 SET_BIT (valid_mult, i + MAX_RATIO);
3270 if (dump_file && (dump_flags & TDF_DETAILS))
3272 fprintf (dump_file, " allowed multipliers:");
3273 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3274 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3275 fprintf (dump_file, " %d", (int) i);
3276 fprintf (dump_file, "\n");
3277 fprintf (dump_file, "\n");
3281 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3282 return false;
3284 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3287 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3288 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3289 variable is omitted. The created memory accesses MODE.
3291 TODO -- there must be some better way. This all is quite crude. */
3293 static unsigned
3294 get_address_cost (bool symbol_present, bool var_present,
3295 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3297 static bool initialized = false;
3298 static HOST_WIDE_INT rat, off;
3299 static HOST_WIDE_INT min_offset, max_offset;
3300 static unsigned costs[2][2][2][2];
3301 unsigned cost, acost;
3302 rtx seq, addr, base;
3303 bool offset_p, ratio_p;
3304 rtx reg1;
3305 HOST_WIDE_INT s_offset;
3306 unsigned HOST_WIDE_INT mask;
3307 unsigned bits;
3309 if (!initialized)
3311 HOST_WIDE_INT i;
3312 initialized = true;
3314 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3316 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3317 for (i = 1; i <= 1 << 20; i <<= 1)
3319 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3320 if (!memory_address_p (Pmode, addr))
3321 break;
3323 max_offset = i >> 1;
3324 off = max_offset;
3326 for (i = 1; i <= 1 << 20; i <<= 1)
3328 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3329 if (!memory_address_p (Pmode, addr))
3330 break;
3332 min_offset = -(i >> 1);
3334 if (dump_file && (dump_flags & TDF_DETAILS))
3336 fprintf (dump_file, "get_address_cost:\n");
3337 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3338 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3341 rat = 1;
3342 for (i = 2; i <= MAX_RATIO; i++)
3343 if (multiplier_allowed_in_address_p (i))
3345 rat = i;
3346 break;
3350 bits = GET_MODE_BITSIZE (Pmode);
3351 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3352 offset &= mask;
3353 if ((offset >> (bits - 1) & 1))
3354 offset |= ~mask;
3355 s_offset = offset;
3357 cost = 0;
3358 offset_p = (s_offset != 0
3359 && min_offset <= s_offset && s_offset <= max_offset);
3360 ratio_p = (ratio != 1
3361 && multiplier_allowed_in_address_p (ratio));
3363 if (ratio != 1 && !ratio_p)
3364 cost += multiply_by_cost (ratio, Pmode);
3366 if (s_offset && !offset_p && !symbol_present)
3368 cost += add_cost (Pmode);
3369 var_present = true;
3372 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3373 if (!acost)
3375 acost = 0;
3377 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3378 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3379 if (ratio_p)
3380 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3382 if (var_present)
3383 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3385 if (symbol_present)
3387 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3388 if (offset_p)
3389 base = gen_rtx_fmt_e (CONST, Pmode,
3390 gen_rtx_fmt_ee (PLUS, Pmode,
3391 base,
3392 gen_int_mode (off, Pmode)));
3394 else if (offset_p)
3395 base = gen_int_mode (off, Pmode);
3396 else
3397 base = NULL_RTX;
3399 if (base)
3400 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3402 start_sequence ();
3403 addr = memory_address (Pmode, addr);
3404 seq = get_insns ();
3405 end_sequence ();
3407 acost = seq_cost (seq);
3408 acost += address_cost (addr, Pmode);
3410 if (!acost)
3411 acost = 1;
3412 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3415 return cost + acost;
3417 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3418 invariants the computation depends on. */
3420 static unsigned
3421 force_var_cost (struct ivopts_data *data,
3422 tree expr, bitmap *depends_on)
3424 static bool costs_initialized = false;
3425 static unsigned integer_cost;
3426 static unsigned symbol_cost;
3427 static unsigned address_cost;
3428 tree op0, op1;
3429 unsigned cost0, cost1, cost;
3430 enum machine_mode mode;
3432 if (!costs_initialized)
3434 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3435 rtx x = gen_rtx_MEM (DECL_MODE (var),
3436 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3437 tree addr;
3438 tree type = build_pointer_type (integer_type_node);
3440 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3441 2000));
3443 SET_DECL_RTL (var, x);
3444 TREE_STATIC (var) = 1;
3445 addr = build1 (ADDR_EXPR, type, var);
3446 symbol_cost = computation_cost (addr) + 1;
3448 address_cost
3449 = computation_cost (build2 (PLUS_EXPR, type,
3450 addr,
3451 build_int_cst_type (type, 2000))) + 1;
3452 if (dump_file && (dump_flags & TDF_DETAILS))
3454 fprintf (dump_file, "force_var_cost:\n");
3455 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3456 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3457 fprintf (dump_file, " address %d\n", (int) address_cost);
3458 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3459 fprintf (dump_file, "\n");
3462 costs_initialized = true;
3465 STRIP_NOPS (expr);
3467 if (depends_on)
3469 fd_ivopts_data = data;
3470 walk_tree (&expr, find_depends, depends_on, NULL);
3473 if (SSA_VAR_P (expr))
3474 return 0;
3476 if (TREE_INVARIANT (expr))
3478 if (TREE_CODE (expr) == INTEGER_CST)
3479 return integer_cost;
3481 if (TREE_CODE (expr) == ADDR_EXPR)
3483 tree obj = TREE_OPERAND (expr, 0);
3485 if (TREE_CODE (obj) == VAR_DECL
3486 || TREE_CODE (obj) == PARM_DECL
3487 || TREE_CODE (obj) == RESULT_DECL)
3488 return symbol_cost;
3491 return address_cost;
3494 switch (TREE_CODE (expr))
3496 case PLUS_EXPR:
3497 case MINUS_EXPR:
3498 case MULT_EXPR:
3499 op0 = TREE_OPERAND (expr, 0);
3500 op1 = TREE_OPERAND (expr, 1);
3501 STRIP_NOPS (op0);
3502 STRIP_NOPS (op1);
3504 if (is_gimple_val (op0))
3505 cost0 = 0;
3506 else
3507 cost0 = force_var_cost (data, op0, NULL);
3509 if (is_gimple_val (op1))
3510 cost1 = 0;
3511 else
3512 cost1 = force_var_cost (data, op1, NULL);
3514 break;
3516 default:
3517 /* Just an arbitrary value, FIXME. */
3518 return target_spill_cost;
3521 mode = TYPE_MODE (TREE_TYPE (expr));
3522 switch (TREE_CODE (expr))
3524 case PLUS_EXPR:
3525 case MINUS_EXPR:
3526 cost = add_cost (mode);
3527 break;
3529 case MULT_EXPR:
3530 if (cst_and_fits_in_hwi (op0))
3531 cost = multiply_by_cost (int_cst_value (op0), mode);
3532 else if (cst_and_fits_in_hwi (op1))
3533 cost = multiply_by_cost (int_cst_value (op1), mode);
3534 else
3535 return target_spill_cost;
3536 break;
3538 default:
3539 gcc_unreachable ();
3542 cost += cost0;
3543 cost += cost1;
3545 /* Bound the cost by target_spill_cost. The parts of complicated
3546 computations often are either loop invariant or at least can
3547 be shared between several iv uses, so letting this grow without
3548 limits would not give reasonable results. */
3549 return cost < target_spill_cost ? cost : target_spill_cost;
3552 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3553 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3554 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3555 invariants the computation depends on. */
3557 static unsigned
3558 split_address_cost (struct ivopts_data *data,
3559 tree addr, bool *symbol_present, bool *var_present,
3560 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3562 tree core;
3563 HOST_WIDE_INT bitsize;
3564 HOST_WIDE_INT bitpos;
3565 tree toffset;
3566 enum machine_mode mode;
3567 int unsignedp, volatilep;
3569 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3570 &unsignedp, &volatilep, false);
3572 if (toffset != 0
3573 || bitpos % BITS_PER_UNIT != 0
3574 || TREE_CODE (core) != VAR_DECL)
3576 *symbol_present = false;
3577 *var_present = true;
3578 fd_ivopts_data = data;
3579 walk_tree (&addr, find_depends, depends_on, NULL);
3580 return target_spill_cost;
3583 *offset += bitpos / BITS_PER_UNIT;
3584 if (TREE_STATIC (core)
3585 || DECL_EXTERNAL (core))
3587 *symbol_present = true;
3588 *var_present = false;
3589 return 0;
3592 *symbol_present = false;
3593 *var_present = true;
3594 return 0;
3597 /* Estimates cost of expressing difference of addresses E1 - E2 as
3598 var + symbol + offset. The value of offset is added to OFFSET,
3599 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3600 part is missing. DEPENDS_ON is a set of the invariants the computation
3601 depends on. */
3603 static unsigned
3604 ptr_difference_cost (struct ivopts_data *data,
3605 tree e1, tree e2, bool *symbol_present, bool *var_present,
3606 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3608 HOST_WIDE_INT diff = 0;
3609 unsigned cost;
3611 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3613 if (ptr_difference_const (e1, e2, &diff))
3615 *offset += diff;
3616 *symbol_present = false;
3617 *var_present = false;
3618 return 0;
3621 if (e2 == integer_zero_node)
3622 return split_address_cost (data, TREE_OPERAND (e1, 0),
3623 symbol_present, var_present, offset, depends_on);
3625 *symbol_present = false;
3626 *var_present = true;
3628 cost = force_var_cost (data, e1, depends_on);
3629 cost += force_var_cost (data, e2, depends_on);
3630 cost += add_cost (Pmode);
3632 return cost;
3635 /* Estimates cost of expressing difference E1 - E2 as
3636 var + symbol + offset. The value of offset is added to OFFSET,
3637 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3638 part is missing. DEPENDS_ON is a set of the invariants the computation
3639 depends on. */
3641 static unsigned
3642 difference_cost (struct ivopts_data *data,
3643 tree e1, tree e2, bool *symbol_present, bool *var_present,
3644 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3646 unsigned cost;
3647 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3648 unsigned HOST_WIDE_INT off1, off2;
3650 e1 = strip_offset (e1, &off1);
3651 e2 = strip_offset (e2, &off2);
3652 *offset += off1 - off2;
3654 STRIP_NOPS (e1);
3655 STRIP_NOPS (e2);
3657 if (TREE_CODE (e1) == ADDR_EXPR)
3658 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3659 depends_on);
3660 *symbol_present = false;
3662 if (operand_equal_p (e1, e2, 0))
3664 *var_present = false;
3665 return 0;
3667 *var_present = true;
3668 if (zero_p (e2))
3669 return force_var_cost (data, e1, depends_on);
3671 if (zero_p (e1))
3673 cost = force_var_cost (data, e2, depends_on);
3674 cost += multiply_by_cost (-1, mode);
3676 return cost;
3679 cost = force_var_cost (data, e1, depends_on);
3680 cost += force_var_cost (data, e2, depends_on);
3681 cost += add_cost (mode);
3683 return cost;
3686 /* Determines the cost of the computation by that USE is expressed
3687 from induction variable CAND. If ADDRESS_P is true, we just need
3688 to create an address from it, otherwise we want to get it into
3689 register. A set of invariants we depend on is stored in
3690 DEPENDS_ON. AT is the statement at that the value is computed. */
3692 static unsigned
3693 get_computation_cost_at (struct ivopts_data *data,
3694 struct iv_use *use, struct iv_cand *cand,
3695 bool address_p, bitmap *depends_on, tree at)
3697 tree ubase = use->iv->base, ustep = use->iv->step;
3698 tree cbase, cstep;
3699 tree utype = TREE_TYPE (ubase), ctype;
3700 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3701 HOST_WIDE_INT ratio, aratio;
3702 bool var_present, symbol_present;
3703 unsigned cost = 0, n_sums;
3705 *depends_on = NULL;
3707 /* Only consider real candidates. */
3708 if (!cand->iv)
3709 return INFTY;
3711 cbase = cand->iv->base;
3712 cstep = cand->iv->step;
3713 ctype = TREE_TYPE (cbase);
3715 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3717 /* We do not have a precision to express the values of use. */
3718 return INFTY;
3721 if (address_p)
3723 /* Do not try to express address of an object with computation based
3724 on address of a different object. This may cause problems in rtl
3725 level alias analysis (that does not expect this to be happening,
3726 as this is illegal in C), and would be unlikely to be useful
3727 anyway. */
3728 if (use->iv->base_object
3729 && cand->iv->base_object
3730 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3731 return INFTY;
3734 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3736 /* TODO -- add direct handling of this case. */
3737 goto fallback;
3740 /* CSTEPI is removed from the offset in case statement is after the
3741 increment. If the step is not constant, we use zero instead.
3742 This is a bit imprecise (there is the extra addition), but
3743 redundancy elimination is likely to transform the code so that
3744 it uses value of the variable before increment anyway,
3745 so it is not that much unrealistic. */
3746 if (cst_and_fits_in_hwi (cstep))
3747 cstepi = int_cst_value (cstep);
3748 else
3749 cstepi = 0;
3751 if (cst_and_fits_in_hwi (ustep)
3752 && cst_and_fits_in_hwi (cstep))
3754 ustepi = int_cst_value (ustep);
3756 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3757 return INFTY;
3759 else
3761 tree rat;
3763 rat = constant_multiple_of (utype, ustep, cstep);
3765 if (!rat)
3766 return INFTY;
3768 if (cst_and_fits_in_hwi (rat))
3769 ratio = int_cst_value (rat);
3770 else if (integer_onep (rat))
3771 ratio = 1;
3772 else if (integer_all_onesp (rat))
3773 ratio = -1;
3774 else
3775 return INFTY;
3778 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3779 or ratio == 1, it is better to handle this like
3781 ubase - ratio * cbase + ratio * var
3783 (also holds in the case ratio == -1, TODO. */
3785 if (cst_and_fits_in_hwi (cbase))
3787 offset = - ratio * int_cst_value (cbase);
3788 cost += difference_cost (data,
3789 ubase, integer_zero_node,
3790 &symbol_present, &var_present, &offset,
3791 depends_on);
3793 else if (ratio == 1)
3795 cost += difference_cost (data,
3796 ubase, cbase,
3797 &symbol_present, &var_present, &offset,
3798 depends_on);
3800 else
3802 cost += force_var_cost (data, cbase, depends_on);
3803 cost += add_cost (TYPE_MODE (ctype));
3804 cost += difference_cost (data,
3805 ubase, integer_zero_node,
3806 &symbol_present, &var_present, &offset,
3807 depends_on);
3810 /* If we are after the increment, the value of the candidate is higher by
3811 one iteration. */
3812 if (stmt_after_increment (data->current_loop, cand, at))
3813 offset -= ratio * cstepi;
3815 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3816 (symbol/var/const parts may be omitted). If we are looking for an address,
3817 find the cost of addressing this. */
3818 if (address_p)
3819 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3821 /* Otherwise estimate the costs for computing the expression. */
3822 aratio = ratio > 0 ? ratio : -ratio;
3823 if (!symbol_present && !var_present && !offset)
3825 if (ratio != 1)
3826 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3828 return cost;
3831 if (aratio != 1)
3832 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3834 n_sums = 1;
3835 if (var_present
3836 /* Symbol + offset should be compile-time computable. */
3837 && (symbol_present || offset))
3838 n_sums++;
3840 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3842 fallback:
3844 /* Just get the expression, expand it and measure the cost. */
3845 tree comp = get_computation_at (data->current_loop, use, cand, at);
3847 if (!comp)
3848 return INFTY;
3850 if (address_p)
3851 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3853 return computation_cost (comp);
3857 /* Determines the cost of the computation by that USE is expressed
3858 from induction variable CAND. If ADDRESS_P is true, we just need
3859 to create an address from it, otherwise we want to get it into
3860 register. A set of invariants we depend on is stored in
3861 DEPENDS_ON. */
3863 static unsigned
3864 get_computation_cost (struct ivopts_data *data,
3865 struct iv_use *use, struct iv_cand *cand,
3866 bool address_p, bitmap *depends_on)
3868 return get_computation_cost_at (data,
3869 use, cand, address_p, depends_on, use->stmt);
3872 /* Determines cost of basing replacement of USE on CAND in a generic
3873 expression. */
3875 static bool
3876 determine_use_iv_cost_generic (struct ivopts_data *data,
3877 struct iv_use *use, struct iv_cand *cand)
3879 bitmap depends_on;
3880 unsigned cost;
3882 /* The simple case first -- if we need to express value of the preserved
3883 original biv, the cost is 0. This also prevents us from counting the
3884 cost of increment twice -- once at this use and once in the cost of
3885 the candidate. */
3886 if (cand->pos == IP_ORIGINAL
3887 && cand->incremented_at == use->stmt)
3889 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3890 return true;
3893 cost = get_computation_cost (data, use, cand, false, &depends_on);
3894 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3896 return cost != INFTY;
3899 /* Determines cost of basing replacement of USE on CAND in an address. */
3901 static bool
3902 determine_use_iv_cost_address (struct ivopts_data *data,
3903 struct iv_use *use, struct iv_cand *cand)
3905 bitmap depends_on;
3906 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3908 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3910 return cost != INFTY;
3913 /* Computes value of induction variable IV in iteration NITER. */
3915 static tree
3916 iv_value (struct iv *iv, tree niter)
3918 tree val;
3919 tree type = TREE_TYPE (iv->base);
3921 niter = fold_convert (type, niter);
3922 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3924 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3927 /* Computes value of candidate CAND at position AT in iteration NITER. */
3929 static tree
3930 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3932 tree val = iv_value (cand->iv, niter);
3933 tree type = TREE_TYPE (cand->iv->base);
3935 if (stmt_after_increment (loop, cand, at))
3936 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3938 return val;
3941 /* Returns period of induction variable iv. */
3943 static tree
3944 iv_period (struct iv *iv)
3946 tree step = iv->step, period, type;
3947 tree pow2div;
3949 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3951 /* Period of the iv is gcd (step, type range). Since type range is power
3952 of two, it suffices to determine the maximum power of two that divides
3953 step. */
3954 pow2div = num_ending_zeros (step);
3955 type = unsigned_type_for (TREE_TYPE (step));
3957 period = build_low_bits_mask (type,
3958 (TYPE_PRECISION (type)
3959 - tree_low_cst (pow2div, 1)));
3961 return period;
3964 /* Returns the comparison operator used when eliminating the iv USE. */
3966 static enum tree_code
3967 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3969 struct loop *loop = data->current_loop;
3970 basic_block ex_bb;
3971 edge exit;
3973 ex_bb = bb_for_stmt (use->stmt);
3974 exit = EDGE_SUCC (ex_bb, 0);
3975 if (flow_bb_inside_loop_p (loop, exit->dest))
3976 exit = EDGE_SUCC (ex_bb, 1);
3978 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3981 /* Check whether it is possible to express the condition in USE by comparison
3982 of candidate CAND. If so, store the value compared with to BOUND. */
3984 static bool
3985 may_eliminate_iv (struct ivopts_data *data,
3986 struct iv_use *use, struct iv_cand *cand, tree *bound)
3988 basic_block ex_bb;
3989 edge exit;
3990 struct tree_niter_desc *niter;
3991 tree nit, nit_type;
3992 tree wider_type, period, per_type;
3993 struct loop *loop = data->current_loop;
3995 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3996 return false;
3998 /* For now works only for exits that dominate the loop latch. TODO -- extend
3999 for other conditions inside loop body. */
4000 ex_bb = bb_for_stmt (use->stmt);
4001 if (use->stmt != last_stmt (ex_bb)
4002 || TREE_CODE (use->stmt) != COND_EXPR)
4003 return false;
4004 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4005 return false;
4007 exit = EDGE_SUCC (ex_bb, 0);
4008 if (flow_bb_inside_loop_p (loop, exit->dest))
4009 exit = EDGE_SUCC (ex_bb, 1);
4010 if (flow_bb_inside_loop_p (loop, exit->dest))
4011 return false;
4013 niter = niter_for_exit (data, exit);
4014 if (!niter
4015 || !zero_p (niter->may_be_zero))
4016 return false;
4018 nit = niter->niter;
4019 nit_type = TREE_TYPE (nit);
4021 /* Determine whether we may use the variable to test whether niter iterations
4022 elapsed. This is the case iff the period of the induction variable is
4023 greater than the number of iterations. */
4024 period = iv_period (cand->iv);
4025 if (!period)
4026 return false;
4027 per_type = TREE_TYPE (period);
4029 wider_type = TREE_TYPE (period);
4030 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4031 wider_type = per_type;
4032 else
4033 wider_type = nit_type;
4035 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4036 fold_convert (wider_type, period),
4037 fold_convert (wider_type, nit))))
4038 return false;
4040 *bound = cand_value_at (loop, cand, use->stmt, nit);
4041 return true;
4044 /* Determines cost of basing replacement of USE on CAND in a condition. */
4046 static bool
4047 determine_use_iv_cost_condition (struct ivopts_data *data,
4048 struct iv_use *use, struct iv_cand *cand)
4050 tree bound = NULL_TREE, op, cond;
4051 bitmap depends_on = NULL;
4052 unsigned cost;
4054 /* Only consider real candidates. */
4055 if (!cand->iv)
4057 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4058 return false;
4061 if (may_eliminate_iv (data, use, cand, &bound))
4063 cost = force_var_cost (data, bound, &depends_on);
4065 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4066 return cost != INFTY;
4069 /* The induction variable elimination failed; just express the original
4070 giv. If it is compared with an invariant, note that we cannot get
4071 rid of it. */
4072 cost = get_computation_cost (data, use, cand, false, &depends_on);
4074 cond = *use->op_p;
4075 if (TREE_CODE (cond) != SSA_NAME)
4077 op = TREE_OPERAND (cond, 0);
4078 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4079 op = TREE_OPERAND (cond, 1);
4080 if (TREE_CODE (op) == SSA_NAME)
4082 op = get_iv (data, op)->base;
4083 fd_ivopts_data = data;
4084 walk_tree (&op, find_depends, &depends_on, NULL);
4088 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4089 return cost != INFTY;
4092 /* Checks whether it is possible to replace the final value of USE by
4093 a direct computation. If so, the formula is stored to *VALUE. */
4095 static bool
4096 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4097 tree *value)
4099 struct loop *loop = data->current_loop;
4100 edge exit;
4101 struct tree_niter_desc *niter;
4103 exit = single_dom_exit (loop);
4104 if (!exit)
4105 return false;
4107 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4108 bb_for_stmt (use->stmt)));
4110 niter = niter_for_single_dom_exit (data);
4111 if (!niter
4112 || !zero_p (niter->may_be_zero))
4113 return false;
4115 *value = iv_value (use->iv, niter->niter);
4117 return true;
4120 /* Determines cost of replacing final value of USE using CAND. */
4122 static bool
4123 determine_use_iv_cost_outer (struct ivopts_data *data,
4124 struct iv_use *use, struct iv_cand *cand)
4126 bitmap depends_on;
4127 unsigned cost;
4128 edge exit;
4129 tree value = NULL_TREE;
4130 struct loop *loop = data->current_loop;
4132 /* The simple case first -- if we need to express value of the preserved
4133 original biv, the cost is 0. This also prevents us from counting the
4134 cost of increment twice -- once at this use and once in the cost of
4135 the candidate. */
4136 if (cand->pos == IP_ORIGINAL
4137 && cand->incremented_at == use->stmt)
4139 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4140 return true;
4143 if (!cand->iv)
4145 if (!may_replace_final_value (data, use, &value))
4147 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4148 return false;
4151 depends_on = NULL;
4152 cost = force_var_cost (data, value, &depends_on);
4154 cost /= AVG_LOOP_NITER (loop);
4156 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4157 return cost != INFTY;
4160 exit = single_dom_exit (loop);
4161 if (exit)
4163 /* If there is just a single exit, we may use value of the candidate
4164 after we take it to determine the value of use. */
4165 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4166 last_stmt (exit->src));
4167 if (cost != INFTY)
4168 cost /= AVG_LOOP_NITER (loop);
4170 else
4172 /* Otherwise we just need to compute the iv. */
4173 cost = get_computation_cost (data, use, cand, false, &depends_on);
4176 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4178 return cost != INFTY;
4181 /* Determines cost of basing replacement of USE on CAND. Returns false
4182 if USE cannot be based on CAND. */
4184 static bool
4185 determine_use_iv_cost (struct ivopts_data *data,
4186 struct iv_use *use, struct iv_cand *cand)
4188 switch (use->type)
4190 case USE_NONLINEAR_EXPR:
4191 return determine_use_iv_cost_generic (data, use, cand);
4193 case USE_OUTER:
4194 return determine_use_iv_cost_outer (data, use, cand);
4196 case USE_ADDRESS:
4197 return determine_use_iv_cost_address (data, use, cand);
4199 case USE_COMPARE:
4200 return determine_use_iv_cost_condition (data, use, cand);
4202 default:
4203 gcc_unreachable ();
4207 /* Determines costs of basing the use of the iv on an iv candidate. */
4209 static void
4210 determine_use_iv_costs (struct ivopts_data *data)
4212 unsigned i, j;
4213 struct iv_use *use;
4214 struct iv_cand *cand;
4215 bitmap to_clear = BITMAP_ALLOC (NULL);
4217 alloc_use_cost_map (data);
4219 for (i = 0; i < n_iv_uses (data); i++)
4221 use = iv_use (data, i);
4223 if (data->consider_all_candidates)
4225 for (j = 0; j < n_iv_cands (data); j++)
4227 cand = iv_cand (data, j);
4228 determine_use_iv_cost (data, use, cand);
4231 else
4233 bitmap_iterator bi;
4235 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4237 cand = iv_cand (data, j);
4238 if (!determine_use_iv_cost (data, use, cand))
4239 bitmap_set_bit (to_clear, j);
4242 /* Remove the candidates for that the cost is infinite from
4243 the list of related candidates. */
4244 bitmap_and_compl_into (use->related_cands, to_clear);
4245 bitmap_clear (to_clear);
4249 BITMAP_FREE (to_clear);
4251 if (dump_file && (dump_flags & TDF_DETAILS))
4253 fprintf (dump_file, "Use-candidate costs:\n");
4255 for (i = 0; i < n_iv_uses (data); i++)
4257 use = iv_use (data, i);
4259 fprintf (dump_file, "Use %d:\n", i);
4260 fprintf (dump_file, " cand\tcost\tdepends on\n");
4261 for (j = 0; j < use->n_map_members; j++)
4263 if (!use->cost_map[j].cand
4264 || use->cost_map[j].cost == INFTY)
4265 continue;
4267 fprintf (dump_file, " %d\t%d\t",
4268 use->cost_map[j].cand->id,
4269 use->cost_map[j].cost);
4270 if (use->cost_map[j].depends_on)
4271 bitmap_print (dump_file,
4272 use->cost_map[j].depends_on, "","");
4273 fprintf (dump_file, "\n");
4276 fprintf (dump_file, "\n");
4278 fprintf (dump_file, "\n");
4282 /* Determines cost of the candidate CAND. */
4284 static void
4285 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4287 unsigned cost_base, cost_step;
4288 tree base;
4290 if (!cand->iv)
4292 cand->cost = 0;
4293 return;
4296 /* There are two costs associated with the candidate -- its increment
4297 and its initialization. The second is almost negligible for any loop
4298 that rolls enough, so we take it just very little into account. */
4300 base = cand->iv->base;
4301 cost_base = force_var_cost (data, base, NULL);
4302 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4304 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4306 /* Prefer the original iv unless we may gain something by replacing it;
4307 this is not really relevant for artificial ivs created by other
4308 passes. */
4309 if (cand->pos == IP_ORIGINAL
4310 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4311 cand->cost--;
4313 /* Prefer not to insert statements into latch unless there are some
4314 already (so that we do not create unnecessary jumps). */
4315 if (cand->pos == IP_END
4316 && empty_block_p (ip_end_pos (data->current_loop)))
4317 cand->cost++;
4320 /* Determines costs of computation of the candidates. */
4322 static void
4323 determine_iv_costs (struct ivopts_data *data)
4325 unsigned i;
4327 if (dump_file && (dump_flags & TDF_DETAILS))
4329 fprintf (dump_file, "Candidate costs:\n");
4330 fprintf (dump_file, " cand\tcost\n");
4333 for (i = 0; i < n_iv_cands (data); i++)
4335 struct iv_cand *cand = iv_cand (data, i);
4337 determine_iv_cost (data, cand);
4339 if (dump_file && (dump_flags & TDF_DETAILS))
4340 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4343 if (dump_file && (dump_flags & TDF_DETAILS))
4344 fprintf (dump_file, "\n");
4347 /* Calculates cost for having SIZE induction variables. */
4349 static unsigned
4350 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4352 return global_cost_for_size (size,
4353 loop_data (data->current_loop)->regs_used,
4354 n_iv_uses (data));
4357 /* For each size of the induction variable set determine the penalty. */
4359 static void
4360 determine_set_costs (struct ivopts_data *data)
4362 unsigned j, n;
4363 tree phi, op;
4364 struct loop *loop = data->current_loop;
4365 bitmap_iterator bi;
4367 /* We use the following model (definitely improvable, especially the
4368 cost function -- TODO):
4370 We estimate the number of registers available (using MD data), name it A.
4372 We estimate the number of registers used by the loop, name it U. This
4373 number is obtained as the number of loop phi nodes (not counting virtual
4374 registers and bivs) + the number of variables from outside of the loop.
4376 We set a reserve R (free regs that are used for temporary computations,
4377 etc.). For now the reserve is a constant 3.
4379 Let I be the number of induction variables.
4381 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4382 make a lot of ivs without a reason).
4383 -- if A - R < U + I <= A, the cost is I * PRES_COST
4384 -- if U + I > A, the cost is I * PRES_COST and
4385 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4387 if (dump_file && (dump_flags & TDF_DETAILS))
4389 fprintf (dump_file, "Global costs:\n");
4390 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4391 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4392 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4393 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4396 n = 0;
4397 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4399 op = PHI_RESULT (phi);
4401 if (!is_gimple_reg (op))
4402 continue;
4404 if (get_iv (data, op))
4405 continue;
4407 n++;
4410 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4412 struct version_info *info = ver_info (data, j);
4414 if (info->inv_id && info->has_nonlin_use)
4415 n++;
4418 loop_data (loop)->regs_used = n;
4419 if (dump_file && (dump_flags & TDF_DETAILS))
4420 fprintf (dump_file, " regs_used %d\n", n);
4422 if (dump_file && (dump_flags & TDF_DETAILS))
4424 fprintf (dump_file, " cost for size:\n");
4425 fprintf (dump_file, " ivs\tcost\n");
4426 for (j = 0; j <= 2 * target_avail_regs; j++)
4427 fprintf (dump_file, " %d\t%d\n", j,
4428 ivopts_global_cost_for_size (data, j));
4429 fprintf (dump_file, "\n");
4433 /* Returns true if A is a cheaper cost pair than B. */
4435 static bool
4436 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4438 if (!a)
4439 return false;
4441 if (!b)
4442 return true;
4444 if (a->cost < b->cost)
4445 return true;
4447 if (a->cost > b->cost)
4448 return false;
4450 /* In case the costs are the same, prefer the cheaper candidate. */
4451 if (a->cand->cost < b->cand->cost)
4452 return true;
4454 return false;
4457 /* Computes the cost field of IVS structure. */
4459 static void
4460 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4462 unsigned cost = 0;
4464 cost += ivs->cand_use_cost;
4465 cost += ivs->cand_cost;
4466 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4468 ivs->cost = cost;
4471 /* Remove invariants in set INVS to set IVS. */
4473 static void
4474 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4476 bitmap_iterator bi;
4477 unsigned iid;
4479 if (!invs)
4480 return;
4482 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4484 ivs->n_invariant_uses[iid]--;
4485 if (ivs->n_invariant_uses[iid] == 0)
4486 ivs->n_regs--;
4490 /* Set USE not to be expressed by any candidate in IVS. */
4492 static void
4493 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4494 struct iv_use *use)
4496 unsigned uid = use->id, cid;
4497 struct cost_pair *cp;
4499 cp = ivs->cand_for_use[uid];
4500 if (!cp)
4501 return;
4502 cid = cp->cand->id;
4504 ivs->bad_uses++;
4505 ivs->cand_for_use[uid] = NULL;
4506 ivs->n_cand_uses[cid]--;
4508 if (ivs->n_cand_uses[cid] == 0)
4510 bitmap_clear_bit (ivs->cands, cid);
4511 /* Do not count the pseudocandidates. */
4512 if (cp->cand->iv)
4513 ivs->n_regs--;
4514 ivs->n_cands--;
4515 ivs->cand_cost -= cp->cand->cost;
4517 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4520 ivs->cand_use_cost -= cp->cost;
4522 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4523 iv_ca_recount_cost (data, ivs);
4526 /* Add invariants in set INVS to set IVS. */
4528 static void
4529 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4531 bitmap_iterator bi;
4532 unsigned iid;
4534 if (!invs)
4535 return;
4537 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4539 ivs->n_invariant_uses[iid]++;
4540 if (ivs->n_invariant_uses[iid] == 1)
4541 ivs->n_regs++;
4545 /* Set cost pair for USE in set IVS to CP. */
4547 static void
4548 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4549 struct iv_use *use, struct cost_pair *cp)
4551 unsigned uid = use->id, cid;
4553 if (ivs->cand_for_use[uid] == cp)
4554 return;
4556 if (ivs->cand_for_use[uid])
4557 iv_ca_set_no_cp (data, ivs, use);
4559 if (cp)
4561 cid = cp->cand->id;
4563 ivs->bad_uses--;
4564 ivs->cand_for_use[uid] = cp;
4565 ivs->n_cand_uses[cid]++;
4566 if (ivs->n_cand_uses[cid] == 1)
4568 bitmap_set_bit (ivs->cands, cid);
4569 /* Do not count the pseudocandidates. */
4570 if (cp->cand->iv)
4571 ivs->n_regs++;
4572 ivs->n_cands++;
4573 ivs->cand_cost += cp->cand->cost;
4575 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4578 ivs->cand_use_cost += cp->cost;
4579 iv_ca_set_add_invariants (ivs, cp->depends_on);
4580 iv_ca_recount_cost (data, ivs);
4584 /* Extend set IVS by expressing USE by some of the candidates in it
4585 if possible. */
4587 static void
4588 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4589 struct iv_use *use)
4591 struct cost_pair *best_cp = NULL, *cp;
4592 bitmap_iterator bi;
4593 unsigned i;
4595 gcc_assert (ivs->upto >= use->id);
4597 if (ivs->upto == use->id)
4599 ivs->upto++;
4600 ivs->bad_uses++;
4603 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4605 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4607 if (cheaper_cost_pair (cp, best_cp))
4608 best_cp = cp;
4611 iv_ca_set_cp (data, ivs, use, best_cp);
4614 /* Get cost for assignment IVS. */
4616 static unsigned
4617 iv_ca_cost (struct iv_ca *ivs)
4619 return (ivs->bad_uses ? INFTY : ivs->cost);
4622 /* Returns true if all dependences of CP are among invariants in IVS. */
4624 static bool
4625 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4627 unsigned i;
4628 bitmap_iterator bi;
4630 if (!cp->depends_on)
4631 return true;
4633 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4635 if (ivs->n_invariant_uses[i] == 0)
4636 return false;
4639 return true;
4642 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4643 it before NEXT_CHANGE. */
4645 static struct iv_ca_delta *
4646 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4647 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4649 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4651 change->use = use;
4652 change->old_cp = old_cp;
4653 change->new_cp = new_cp;
4654 change->next_change = next_change;
4656 return change;
4659 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4660 are rewritten. */
4662 static struct iv_ca_delta *
4663 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4665 struct iv_ca_delta *last;
4667 if (!l2)
4668 return l1;
4670 if (!l1)
4671 return l2;
4673 for (last = l1; last->next_change; last = last->next_change)
4674 continue;
4675 last->next_change = l2;
4677 return l1;
4680 /* Returns candidate by that USE is expressed in IVS. */
4682 static struct cost_pair *
4683 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4685 return ivs->cand_for_use[use->id];
4688 /* Reverse the list of changes DELTA, forming the inverse to it. */
4690 static struct iv_ca_delta *
4691 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4693 struct iv_ca_delta *act, *next, *prev = NULL;
4694 struct cost_pair *tmp;
4696 for (act = delta; act; act = next)
4698 next = act->next_change;
4699 act->next_change = prev;
4700 prev = act;
4702 tmp = act->old_cp;
4703 act->old_cp = act->new_cp;
4704 act->new_cp = tmp;
4707 return prev;
4710 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4711 reverted instead. */
4713 static void
4714 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4715 struct iv_ca_delta *delta, bool forward)
4717 struct cost_pair *from, *to;
4718 struct iv_ca_delta *act;
4720 if (!forward)
4721 delta = iv_ca_delta_reverse (delta);
4723 for (act = delta; act; act = act->next_change)
4725 from = act->old_cp;
4726 to = act->new_cp;
4727 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4728 iv_ca_set_cp (data, ivs, act->use, to);
4731 if (!forward)
4732 iv_ca_delta_reverse (delta);
4735 /* Returns true if CAND is used in IVS. */
4737 static bool
4738 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4740 return ivs->n_cand_uses[cand->id] > 0;
4743 /* Returns number of induction variable candidates in the set IVS. */
4745 static unsigned
4746 iv_ca_n_cands (struct iv_ca *ivs)
4748 return ivs->n_cands;
4751 /* Free the list of changes DELTA. */
4753 static void
4754 iv_ca_delta_free (struct iv_ca_delta **delta)
4756 struct iv_ca_delta *act, *next;
4758 for (act = *delta; act; act = next)
4760 next = act->next_change;
4761 free (act);
4764 *delta = NULL;
4767 /* Allocates new iv candidates assignment. */
4769 static struct iv_ca *
4770 iv_ca_new (struct ivopts_data *data)
4772 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4774 nw->upto = 0;
4775 nw->bad_uses = 0;
4776 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4777 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4778 nw->cands = BITMAP_ALLOC (NULL);
4779 nw->n_cands = 0;
4780 nw->n_regs = 0;
4781 nw->cand_use_cost = 0;
4782 nw->cand_cost = 0;
4783 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4784 nw->cost = 0;
4786 return nw;
4789 /* Free memory occupied by the set IVS. */
4791 static void
4792 iv_ca_free (struct iv_ca **ivs)
4794 free ((*ivs)->cand_for_use);
4795 free ((*ivs)->n_cand_uses);
4796 BITMAP_FREE ((*ivs)->cands);
4797 free ((*ivs)->n_invariant_uses);
4798 free (*ivs);
4799 *ivs = NULL;
4802 /* Dumps IVS to FILE. */
4804 static void
4805 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4807 const char *pref = " invariants ";
4808 unsigned i;
4810 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4811 bitmap_print (file, ivs->cands, " candidates ","\n");
4813 for (i = 1; i <= data->max_inv_id; i++)
4814 if (ivs->n_invariant_uses[i])
4816 fprintf (file, "%s%d", pref, i);
4817 pref = ", ";
4819 fprintf (file, "\n");
4822 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4823 new set, and store differences in DELTA. Number of induction variables
4824 in the new set is stored to N_IVS. */
4826 static unsigned
4827 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4828 struct iv_cand *cand, struct iv_ca_delta **delta,
4829 unsigned *n_ivs)
4831 unsigned i, cost;
4832 struct iv_use *use;
4833 struct cost_pair *old_cp, *new_cp;
4835 *delta = NULL;
4836 for (i = 0; i < ivs->upto; i++)
4838 use = iv_use (data, i);
4839 old_cp = iv_ca_cand_for_use (ivs, use);
4841 if (old_cp
4842 && old_cp->cand == cand)
4843 continue;
4845 new_cp = get_use_iv_cost (data, use, cand);
4846 if (!new_cp)
4847 continue;
4849 if (!iv_ca_has_deps (ivs, new_cp))
4850 continue;
4852 if (!cheaper_cost_pair (new_cp, old_cp))
4853 continue;
4855 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4858 iv_ca_delta_commit (data, ivs, *delta, true);
4859 cost = iv_ca_cost (ivs);
4860 if (n_ivs)
4861 *n_ivs = iv_ca_n_cands (ivs);
4862 iv_ca_delta_commit (data, ivs, *delta, false);
4864 return cost;
4867 /* Try narrowing set IVS by removing CAND. Return the cost of
4868 the new set and store the differences in DELTA. */
4870 static unsigned
4871 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4872 struct iv_cand *cand, struct iv_ca_delta **delta)
4874 unsigned i, ci;
4875 struct iv_use *use;
4876 struct cost_pair *old_cp, *new_cp, *cp;
4877 bitmap_iterator bi;
4878 struct iv_cand *cnd;
4879 unsigned cost;
4881 *delta = NULL;
4882 for (i = 0; i < n_iv_uses (data); i++)
4884 use = iv_use (data, i);
4886 old_cp = iv_ca_cand_for_use (ivs, use);
4887 if (old_cp->cand != cand)
4888 continue;
4890 new_cp = NULL;
4892 if (data->consider_all_candidates)
4894 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4896 if (ci == cand->id)
4897 continue;
4899 cnd = iv_cand (data, ci);
4901 cp = get_use_iv_cost (data, use, cnd);
4902 if (!cp)
4903 continue;
4904 if (!iv_ca_has_deps (ivs, cp))
4905 continue;
4907 if (!cheaper_cost_pair (cp, new_cp))
4908 continue;
4910 new_cp = cp;
4913 else
4915 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4917 if (ci == cand->id)
4918 continue;
4920 cnd = iv_cand (data, ci);
4922 cp = get_use_iv_cost (data, use, cnd);
4923 if (!cp)
4924 continue;
4925 if (!iv_ca_has_deps (ivs, cp))
4926 continue;
4928 if (!cheaper_cost_pair (cp, new_cp))
4929 continue;
4931 new_cp = cp;
4935 if (!new_cp)
4937 iv_ca_delta_free (delta);
4938 return INFTY;
4941 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4944 iv_ca_delta_commit (data, ivs, *delta, true);
4945 cost = iv_ca_cost (ivs);
4946 iv_ca_delta_commit (data, ivs, *delta, false);
4948 return cost;
4951 /* Try optimizing the set of candidates IVS by removing candidates different
4952 from to EXCEPT_CAND from it. Return cost of the new set, and store
4953 differences in DELTA. */
4955 static unsigned
4956 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4957 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4959 bitmap_iterator bi;
4960 struct iv_ca_delta *act_delta, *best_delta;
4961 unsigned i, best_cost, acost;
4962 struct iv_cand *cand;
4964 best_delta = NULL;
4965 best_cost = iv_ca_cost (ivs);
4967 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4969 cand = iv_cand (data, i);
4971 if (cand == except_cand)
4972 continue;
4974 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4976 if (acost < best_cost)
4978 best_cost = acost;
4979 iv_ca_delta_free (&best_delta);
4980 best_delta = act_delta;
4982 else
4983 iv_ca_delta_free (&act_delta);
4986 if (!best_delta)
4988 *delta = NULL;
4989 return best_cost;
4992 /* Recurse to possibly remove other unnecessary ivs. */
4993 iv_ca_delta_commit (data, ivs, best_delta, true);
4994 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4995 iv_ca_delta_commit (data, ivs, best_delta, false);
4996 *delta = iv_ca_delta_join (best_delta, *delta);
4997 return best_cost;
5000 /* Tries to extend the sets IVS in the best possible way in order
5001 to express the USE. */
5003 static bool
5004 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5005 struct iv_use *use)
5007 unsigned best_cost, act_cost;
5008 unsigned i;
5009 bitmap_iterator bi;
5010 struct iv_cand *cand;
5011 struct iv_ca_delta *best_delta = NULL, *act_delta;
5012 struct cost_pair *cp;
5014 iv_ca_add_use (data, ivs, use);
5015 best_cost = iv_ca_cost (ivs);
5017 cp = iv_ca_cand_for_use (ivs, use);
5018 if (cp)
5020 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5021 iv_ca_set_no_cp (data, ivs, use);
5024 /* First try important candidates. Only if it fails, try the specific ones.
5025 Rationale -- in loops with many variables the best choice often is to use
5026 just one generic biv. If we added here many ivs specific to the uses,
5027 the optimization algorithm later would be likely to get stuck in a local
5028 minimum, thus causing us to create too many ivs. The approach from
5029 few ivs to more seems more likely to be successful -- starting from few
5030 ivs, replacing an expensive use by a specific iv should always be a
5031 win. */
5032 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5034 cand = iv_cand (data, i);
5036 if (iv_ca_cand_used_p (ivs, cand))
5037 continue;
5039 cp = get_use_iv_cost (data, use, cand);
5040 if (!cp)
5041 continue;
5043 iv_ca_set_cp (data, ivs, use, cp);
5044 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5045 iv_ca_set_no_cp (data, ivs, use);
5046 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5048 if (act_cost < best_cost)
5050 best_cost = act_cost;
5052 iv_ca_delta_free (&best_delta);
5053 best_delta = act_delta;
5055 else
5056 iv_ca_delta_free (&act_delta);
5059 if (best_cost == INFTY)
5061 for (i = 0; i < use->n_map_members; i++)
5063 cp = use->cost_map + i;
5064 cand = cp->cand;
5065 if (!cand)
5066 continue;
5068 /* Already tried this. */
5069 if (cand->important)
5070 continue;
5072 if (iv_ca_cand_used_p (ivs, cand))
5073 continue;
5075 act_delta = NULL;
5076 iv_ca_set_cp (data, ivs, use, cp);
5077 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5078 iv_ca_set_no_cp (data, ivs, use);
5079 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5080 cp, act_delta);
5082 if (act_cost < best_cost)
5084 best_cost = act_cost;
5086 if (best_delta)
5087 iv_ca_delta_free (&best_delta);
5088 best_delta = act_delta;
5090 else
5091 iv_ca_delta_free (&act_delta);
5095 iv_ca_delta_commit (data, ivs, best_delta, true);
5096 iv_ca_delta_free (&best_delta);
5098 return (best_cost != INFTY);
5101 /* Finds an initial assignment of candidates to uses. */
5103 static struct iv_ca *
5104 get_initial_solution (struct ivopts_data *data)
5106 struct iv_ca *ivs = iv_ca_new (data);
5107 unsigned i;
5109 for (i = 0; i < n_iv_uses (data); i++)
5110 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5112 iv_ca_free (&ivs);
5113 return NULL;
5116 return ivs;
5119 /* Tries to improve set of induction variables IVS. */
5121 static bool
5122 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5124 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5125 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5126 struct iv_cand *cand;
5128 /* Try extending the set of induction variables by one. */
5129 for (i = 0; i < n_iv_cands (data); i++)
5131 cand = iv_cand (data, i);
5133 if (iv_ca_cand_used_p (ivs, cand))
5134 continue;
5136 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5137 if (!act_delta)
5138 continue;
5140 /* If we successfully added the candidate and the set is small enough,
5141 try optimizing it by removing other candidates. */
5142 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5144 iv_ca_delta_commit (data, ivs, act_delta, true);
5145 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5146 iv_ca_delta_commit (data, ivs, act_delta, false);
5147 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5150 if (acost < best_cost)
5152 best_cost = acost;
5153 iv_ca_delta_free (&best_delta);
5154 best_delta = act_delta;
5156 else
5157 iv_ca_delta_free (&act_delta);
5160 if (!best_delta)
5162 /* Try removing the candidates from the set instead. */
5163 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5165 /* Nothing more we can do. */
5166 if (!best_delta)
5167 return false;
5170 iv_ca_delta_commit (data, ivs, best_delta, true);
5171 gcc_assert (best_cost == iv_ca_cost (ivs));
5172 iv_ca_delta_free (&best_delta);
5173 return true;
5176 /* Attempts to find the optimal set of induction variables. We do simple
5177 greedy heuristic -- we try to replace at most one candidate in the selected
5178 solution and remove the unused ivs while this improves the cost. */
5180 static struct iv_ca *
5181 find_optimal_iv_set (struct ivopts_data *data)
5183 unsigned i;
5184 struct iv_ca *set;
5185 struct iv_use *use;
5187 /* Get the initial solution. */
5188 set = get_initial_solution (data);
5189 if (!set)
5191 if (dump_file && (dump_flags & TDF_DETAILS))
5192 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5193 return NULL;
5196 if (dump_file && (dump_flags & TDF_DETAILS))
5198 fprintf (dump_file, "Initial set of candidates:\n");
5199 iv_ca_dump (data, dump_file, set);
5202 while (try_improve_iv_set (data, set))
5204 if (dump_file && (dump_flags & TDF_DETAILS))
5206 fprintf (dump_file, "Improved to:\n");
5207 iv_ca_dump (data, dump_file, set);
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5212 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5214 for (i = 0; i < n_iv_uses (data); i++)
5216 use = iv_use (data, i);
5217 use->selected = iv_ca_cand_for_use (set, use)->cand;
5220 return set;
5223 /* Creates a new induction variable corresponding to CAND. */
5225 static void
5226 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5228 block_stmt_iterator incr_pos;
5229 tree base;
5230 bool after = false;
5232 if (!cand->iv)
5233 return;
5235 switch (cand->pos)
5237 case IP_NORMAL:
5238 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5239 break;
5241 case IP_END:
5242 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5243 after = true;
5244 break;
5246 case IP_ORIGINAL:
5247 /* Mark that the iv is preserved. */
5248 name_info (data, cand->var_before)->preserve_biv = true;
5249 name_info (data, cand->var_after)->preserve_biv = true;
5251 /* Rewrite the increment so that it uses var_before directly. */
5252 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5254 return;
5257 gimple_add_tmp_var (cand->var_before);
5258 add_referenced_tmp_var (cand->var_before);
5260 base = unshare_expr (cand->iv->base);
5262 create_iv (base, unshare_expr (cand->iv->step),
5263 cand->var_before, data->current_loop,
5264 &incr_pos, after, &cand->var_before, &cand->var_after);
5267 /* Creates new induction variables described in SET. */
5269 static void
5270 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5272 unsigned i;
5273 struct iv_cand *cand;
5274 bitmap_iterator bi;
5276 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5278 cand = iv_cand (data, i);
5279 create_new_iv (data, cand);
5283 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5284 is true, remove also the ssa name defined by the statement. */
5286 static void
5287 remove_statement (tree stmt, bool including_defined_name)
5289 if (TREE_CODE (stmt) == PHI_NODE)
5291 if (!including_defined_name)
5293 /* Prevent the ssa name defined by the statement from being removed. */
5294 SET_PHI_RESULT (stmt, NULL);
5296 remove_phi_node (stmt, NULL_TREE);
5298 else
5300 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5302 bsi_remove (&bsi);
5306 /* Rewrites USE (definition of iv used in a nonlinear expression)
5307 using candidate CAND. */
5309 static void
5310 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5311 struct iv_use *use, struct iv_cand *cand)
5313 tree comp;
5314 tree op, stmts, tgt, ass;
5315 block_stmt_iterator bsi, pbsi;
5317 /* An important special case -- if we are asked to express value of
5318 the original iv by itself, just exit; there is no need to
5319 introduce a new computation (that might also need casting the
5320 variable to unsigned and back). */
5321 if (cand->pos == IP_ORIGINAL
5322 && TREE_CODE (use->stmt) == MODIFY_EXPR
5323 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5325 op = TREE_OPERAND (use->stmt, 1);
5327 /* Be a bit careful. In case variable is expressed in some
5328 complicated way, rewrite it so that we may get rid of this
5329 complicated expression. */
5330 if ((TREE_CODE (op) == PLUS_EXPR
5331 || TREE_CODE (op) == MINUS_EXPR)
5332 && TREE_OPERAND (op, 0) == cand->var_before
5333 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5334 return;
5337 comp = get_computation (data->current_loop, use, cand);
5338 switch (TREE_CODE (use->stmt))
5340 case PHI_NODE:
5341 tgt = PHI_RESULT (use->stmt);
5343 /* If we should keep the biv, do not replace it. */
5344 if (name_info (data, tgt)->preserve_biv)
5345 return;
5347 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5348 while (!bsi_end_p (pbsi)
5349 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5351 bsi = pbsi;
5352 bsi_next (&pbsi);
5354 break;
5356 case MODIFY_EXPR:
5357 tgt = TREE_OPERAND (use->stmt, 0);
5358 bsi = bsi_for_stmt (use->stmt);
5359 break;
5361 default:
5362 gcc_unreachable ();
5365 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5367 if (TREE_CODE (use->stmt) == PHI_NODE)
5369 if (stmts)
5370 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5371 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5372 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5373 remove_statement (use->stmt, false);
5374 SSA_NAME_DEF_STMT (tgt) = ass;
5376 else
5378 if (stmts)
5379 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5380 TREE_OPERAND (use->stmt, 1) = op;
5384 /* Replaces ssa name in index IDX by its basic variable. Callback for
5385 for_each_index. */
5387 static bool
5388 idx_remove_ssa_names (tree base, tree *idx,
5389 void *data ATTRIBUTE_UNUSED)
5391 tree *op;
5393 if (TREE_CODE (*idx) == SSA_NAME)
5394 *idx = SSA_NAME_VAR (*idx);
5396 if (TREE_CODE (base) == ARRAY_REF)
5398 op = &TREE_OPERAND (base, 2);
5399 if (*op
5400 && TREE_CODE (*op) == SSA_NAME)
5401 *op = SSA_NAME_VAR (*op);
5402 op = &TREE_OPERAND (base, 3);
5403 if (*op
5404 && TREE_CODE (*op) == SSA_NAME)
5405 *op = SSA_NAME_VAR (*op);
5408 return true;
5411 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5413 static tree
5414 unshare_and_remove_ssa_names (tree ref)
5416 ref = unshare_expr (ref);
5417 for_each_index (&ref, idx_remove_ssa_names, NULL);
5419 return ref;
5422 /* Extract the alias analysis info for the memory reference REF. There are
5423 several ways how this information may be stored and what precisely is
5424 its semantics depending on the type of the reference, but there always is
5425 somewhere hidden one _DECL node that is used to determine the set of
5426 virtual operands for the reference. The code below deciphers this jungle
5427 and extracts this single useful piece of information. */
5429 static tree
5430 get_ref_tag (tree ref)
5432 tree var = get_base_address (ref);
5433 tree tag;
5435 if (!var)
5436 return NULL_TREE;
5438 if (TREE_CODE (var) == INDIRECT_REF)
5439 var = TREE_OPERAND (var, 0);
5440 if (TREE_CODE (var) == SSA_NAME)
5442 if (SSA_NAME_PTR_INFO (var))
5444 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5445 if (tag)
5446 return tag;
5449 var = SSA_NAME_VAR (var);
5452 if (DECL_P (var))
5454 tag = var_ann (var)->type_mem_tag;
5455 if (tag)
5456 return tag;
5458 return var;
5461 return NULL_TREE;
5464 /* Copies the reference information from OLD_REF to NEW_REF. */
5466 static void
5467 copy_ref_info (tree new_ref, tree old_ref)
5469 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5470 copy_mem_ref_info (new_ref, old_ref);
5471 else
5473 TMR_TAG (new_ref) = get_ref_tag (old_ref);
5474 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5478 /* Rewrites USE (address that is an iv) using candidate CAND. */
5480 static void
5481 rewrite_use_address (struct ivopts_data *data,
5482 struct iv_use *use, struct iv_cand *cand)
5484 struct affine_tree_combination aff;
5485 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5486 tree ref;
5488 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5489 unshare_aff_combination (&aff);
5491 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5492 copy_ref_info (ref, *use->op_p);
5493 *use->op_p = ref;
5496 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5497 candidate CAND. */
5499 static void
5500 rewrite_use_compare (struct ivopts_data *data,
5501 struct iv_use *use, struct iv_cand *cand)
5503 tree comp;
5504 tree *op_p, cond, op, stmts, bound;
5505 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5506 enum tree_code compare;
5507 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5509 bound = cp->value;
5510 if (bound)
5512 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5513 tree var_type = TREE_TYPE (var);
5515 compare = iv_elimination_compare (data, use);
5516 bound = fold_convert (var_type, bound);
5517 op = force_gimple_operand (unshare_expr (bound), &stmts,
5518 true, NULL_TREE);
5520 if (stmts)
5521 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5523 *use->op_p = build2 (compare, boolean_type_node, var, op);
5524 update_stmt (use->stmt);
5525 return;
5528 /* The induction variable elimination failed; just express the original
5529 giv. */
5530 comp = get_computation (data->current_loop, use, cand);
5532 cond = *use->op_p;
5533 op_p = &TREE_OPERAND (cond, 0);
5534 if (TREE_CODE (*op_p) != SSA_NAME
5535 || zero_p (get_iv (data, *op_p)->step))
5536 op_p = &TREE_OPERAND (cond, 1);
5538 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5539 if (stmts)
5540 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5542 *op_p = op;
5545 /* Ensure that operand *OP_P may be used at the end of EXIT without
5546 violating loop closed ssa form. */
5548 static void
5549 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5551 basic_block def_bb;
5552 struct loop *def_loop;
5553 tree phi, use;
5555 use = USE_FROM_PTR (op_p);
5556 if (TREE_CODE (use) != SSA_NAME)
5557 return;
5559 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5560 if (!def_bb)
5561 return;
5563 def_loop = def_bb->loop_father;
5564 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5565 return;
5567 /* Try finding a phi node that copies the value out of the loop. */
5568 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5569 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5570 break;
5572 if (!phi)
5574 /* Create such a phi node. */
5575 tree new_name = duplicate_ssa_name (use, NULL);
5577 phi = create_phi_node (new_name, exit->dest);
5578 SSA_NAME_DEF_STMT (new_name) = phi;
5579 add_phi_arg (phi, use, exit);
5582 SET_USE (op_p, PHI_RESULT (phi));
5585 /* Ensure that operands of STMT may be used at the end of EXIT without
5586 violating loop closed ssa form. */
5588 static void
5589 protect_loop_closed_ssa_form (edge exit, tree stmt)
5591 ssa_op_iter iter;
5592 use_operand_p use_p;
5594 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5595 protect_loop_closed_ssa_form_use (exit, use_p);
5598 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5599 so that they are emitted on the correct place, and so that the loop closed
5600 ssa form is preserved. */
5602 static void
5603 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5605 tree_stmt_iterator tsi;
5606 block_stmt_iterator bsi;
5607 tree phi, stmt, def, next;
5609 if (!single_pred_p (exit->dest))
5610 split_loop_exit_edge (exit);
5612 /* Ensure there is label in exit->dest, so that we can
5613 insert after it. */
5614 tree_block_label (exit->dest);
5615 bsi = bsi_after_labels (exit->dest);
5617 if (TREE_CODE (stmts) == STATEMENT_LIST)
5619 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5621 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5622 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5625 else
5627 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5628 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5631 if (!op)
5632 return;
5634 for (phi = phi_nodes (exit->dest); phi; phi = next)
5636 next = PHI_CHAIN (phi);
5638 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5640 def = PHI_RESULT (phi);
5641 remove_statement (phi, false);
5642 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5643 def, op);
5644 SSA_NAME_DEF_STMT (def) = stmt;
5645 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5650 /* Rewrites the final value of USE (that is only needed outside of the loop)
5651 using candidate CAND. */
5653 static void
5654 rewrite_use_outer (struct ivopts_data *data,
5655 struct iv_use *use, struct iv_cand *cand)
5657 edge exit;
5658 tree value, op, stmts, tgt;
5659 tree phi;
5661 switch (TREE_CODE (use->stmt))
5663 case PHI_NODE:
5664 tgt = PHI_RESULT (use->stmt);
5665 break;
5666 case MODIFY_EXPR:
5667 tgt = TREE_OPERAND (use->stmt, 0);
5668 break;
5669 default:
5670 gcc_unreachable ();
5673 exit = single_dom_exit (data->current_loop);
5675 if (exit)
5677 if (!cand->iv)
5679 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5680 value = unshare_expr (cp->value);
5682 else
5683 value = get_computation_at (data->current_loop,
5684 use, cand, last_stmt (exit->src));
5686 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5688 /* If we will preserve the iv anyway and we would need to perform
5689 some computation to replace the final value, do nothing. */
5690 if (stmts && name_info (data, tgt)->preserve_biv)
5691 return;
5693 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5695 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5697 if (USE_FROM_PTR (use_p) == tgt)
5698 SET_USE (use_p, op);
5701 if (stmts)
5702 compute_phi_arg_on_exit (exit, stmts, op);
5704 /* Enable removal of the statement. We cannot remove it directly,
5705 since we may still need the aliasing information attached to the
5706 ssa name defined by it. */
5707 name_info (data, tgt)->iv->have_use_for = false;
5708 return;
5711 /* If the variable is going to be preserved anyway, there is nothing to
5712 do. */
5713 if (name_info (data, tgt)->preserve_biv)
5714 return;
5716 /* Otherwise we just need to compute the iv. */
5717 rewrite_use_nonlinear_expr (data, use, cand);
5720 /* Rewrites USE using candidate CAND. */
5722 static void
5723 rewrite_use (struct ivopts_data *data,
5724 struct iv_use *use, struct iv_cand *cand)
5726 switch (use->type)
5728 case USE_NONLINEAR_EXPR:
5729 rewrite_use_nonlinear_expr (data, use, cand);
5730 break;
5732 case USE_OUTER:
5733 rewrite_use_outer (data, use, cand);
5734 break;
5736 case USE_ADDRESS:
5737 rewrite_use_address (data, use, cand);
5738 break;
5740 case USE_COMPARE:
5741 rewrite_use_compare (data, use, cand);
5742 break;
5744 default:
5745 gcc_unreachable ();
5747 update_stmt (use->stmt);
5750 /* Rewrite the uses using the selected induction variables. */
5752 static void
5753 rewrite_uses (struct ivopts_data *data)
5755 unsigned i;
5756 struct iv_cand *cand;
5757 struct iv_use *use;
5759 for (i = 0; i < n_iv_uses (data); i++)
5761 use = iv_use (data, i);
5762 cand = use->selected;
5763 gcc_assert (cand);
5765 rewrite_use (data, use, cand);
5769 /* Removes the ivs that are not used after rewriting. */
5771 static void
5772 remove_unused_ivs (struct ivopts_data *data)
5774 unsigned j;
5775 bitmap_iterator bi;
5777 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5779 struct version_info *info;
5781 info = ver_info (data, j);
5782 if (info->iv
5783 && !zero_p (info->iv->step)
5784 && !info->inv_id
5785 && !info->iv->have_use_for
5786 && !info->preserve_biv)
5787 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5791 /* Frees data allocated by the optimization of a single loop. */
5793 static void
5794 free_loop_data (struct ivopts_data *data)
5796 unsigned i, j;
5797 bitmap_iterator bi;
5798 tree obj;
5800 htab_empty (data->niters);
5802 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5804 struct version_info *info;
5806 info = ver_info (data, i);
5807 if (info->iv)
5808 free (info->iv);
5809 info->iv = NULL;
5810 info->has_nonlin_use = false;
5811 info->preserve_biv = false;
5812 info->inv_id = 0;
5814 bitmap_clear (data->relevant);
5815 bitmap_clear (data->important_candidates);
5817 for (i = 0; i < n_iv_uses (data); i++)
5819 struct iv_use *use = iv_use (data, i);
5821 free (use->iv);
5822 BITMAP_FREE (use->related_cands);
5823 for (j = 0; j < use->n_map_members; j++)
5824 if (use->cost_map[j].depends_on)
5825 BITMAP_FREE (use->cost_map[j].depends_on);
5826 free (use->cost_map);
5827 free (use);
5829 VEC_truncate (iv_use_p, data->iv_uses, 0);
5831 for (i = 0; i < n_iv_cands (data); i++)
5833 struct iv_cand *cand = iv_cand (data, i);
5835 if (cand->iv)
5836 free (cand->iv);
5837 if (cand->depends_on)
5838 BITMAP_FREE (cand->depends_on);
5839 free (cand);
5841 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5843 if (data->version_info_size < num_ssa_names)
5845 data->version_info_size = 2 * num_ssa_names;
5846 free (data->version_info);
5847 data->version_info = xcalloc (data->version_info_size,
5848 sizeof (struct version_info));
5851 data->max_inv_id = 0;
5853 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5854 SET_DECL_RTL (obj, NULL_RTX);
5856 VEC_truncate (tree, decl_rtl_to_reset, 0);
5859 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5860 loop tree. */
5862 static void
5863 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5865 unsigned i;
5867 for (i = 1; i < loops->num; i++)
5868 if (loops->parray[i])
5870 free (loops->parray[i]->aux);
5871 loops->parray[i]->aux = NULL;
5874 free_loop_data (data);
5875 free (data->version_info);
5876 BITMAP_FREE (data->relevant);
5877 BITMAP_FREE (data->important_candidates);
5878 htab_delete (data->niters);
5880 VEC_free (tree, heap, decl_rtl_to_reset);
5881 VEC_free (iv_use_p, heap, data->iv_uses);
5882 VEC_free (iv_cand_p, heap, data->iv_candidates);
5885 /* Optimizes the LOOP. Returns true if anything changed. */
5887 static bool
5888 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5890 bool changed = false;
5891 struct iv_ca *iv_ca;
5892 edge exit;
5894 data->current_loop = loop;
5896 if (dump_file && (dump_flags & TDF_DETAILS))
5898 fprintf (dump_file, "Processing loop %d\n", loop->num);
5900 exit = single_dom_exit (loop);
5901 if (exit)
5903 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5904 exit->src->index, exit->dest->index);
5905 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5906 fprintf (dump_file, "\n");
5909 fprintf (dump_file, "\n");
5912 /* For each ssa name determines whether it behaves as an induction variable
5913 in some loop. */
5914 if (!find_induction_variables (data))
5915 goto finish;
5917 /* Finds interesting uses (item 1). */
5918 find_interesting_uses (data);
5919 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5920 goto finish;
5922 /* Finds candidates for the induction variables (item 2). */
5923 find_iv_candidates (data);
5925 /* Calculates the costs (item 3, part 1). */
5926 determine_use_iv_costs (data);
5927 determine_iv_costs (data);
5928 determine_set_costs (data);
5930 /* Find the optimal set of induction variables (item 3, part 2). */
5931 iv_ca = find_optimal_iv_set (data);
5932 if (!iv_ca)
5933 goto finish;
5934 changed = true;
5936 /* Create the new induction variables (item 4, part 1). */
5937 create_new_ivs (data, iv_ca);
5938 iv_ca_free (&iv_ca);
5940 /* Rewrite the uses (item 4, part 2). */
5941 rewrite_uses (data);
5943 /* Remove the ivs that are unused after rewriting. */
5944 remove_unused_ivs (data);
5946 /* We have changed the structure of induction variables; it might happen
5947 that definitions in the scev database refer to some of them that were
5948 eliminated. */
5949 scev_reset ();
5951 finish:
5952 free_loop_data (data);
5954 return changed;
5957 /* Main entry point. Optimizes induction variables in LOOPS. */
5959 void
5960 tree_ssa_iv_optimize (struct loops *loops)
5962 struct loop *loop;
5963 struct ivopts_data data;
5965 tree_ssa_iv_optimize_init (loops, &data);
5967 /* Optimize the loops starting with the innermost ones. */
5968 loop = loops->tree_root;
5969 while (loop->inner)
5970 loop = loop->inner;
5972 /* Scan the loops, inner ones first. */
5973 while (loop != loops->tree_root)
5975 if (dump_file && (dump_flags & TDF_DETAILS))
5976 flow_loop_dump (loop, dump_file, NULL, 1);
5978 tree_ssa_iv_optimize_loop (&data, loop);
5980 if (loop->next)
5982 loop = loop->next;
5983 while (loop->inner)
5984 loop = loop->inner;
5986 else
5987 loop = loop->outer;
5990 tree_ssa_iv_optimize_finalize (loops, &data);