2005-06-07 Adrian Straetling <straetling@de.ibm.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob716d113e75ecd7be15c2bb2ba4653177d0d5f27e
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
208 struct ivopts_data
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
244 struct iv_ca
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
259 bitmap cands;
261 /* The number of candidates in the set. */
262 unsigned n_cands;
264 /* Total number of registers needed. */
265 unsigned n_regs;
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
271 unsigned cand_cost;
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
277 unsigned cost;
280 /* Difference of two iv candidate assignments. */
282 struct iv_ca_delta
284 /* Changed use. */
285 struct iv_use *use;
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
356 return loop->aux;
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 static edge
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
366 if (!exit)
367 return NULL;
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
372 return exit;
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
381 if (iv->ssa_name)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
392 if (iv->step)
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
402 else
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
409 if (iv->base_object)
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
428 switch (use->type)
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
554 a &= mask;
555 b &= mask;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
566 if (!(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
589 *x = val;
591 return true;
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
602 gcc_assert (bb);
604 if (sbb == loop->latch)
605 return true;
607 if (sbb != bb)
608 return false;
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
626 if (stmt_bb != cand_bb)
627 return true;
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
646 switch (cand->pos)
648 case IP_END:
649 return false;
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
657 default:
658 gcc_unreachable ();
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
665 struct nfe_cache_elt
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
679 static hashval_t
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
689 static int
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
710 if (!*slot)
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
716 *slot = nfe_desc;
718 else
719 nfe_desc = *slot;
721 if (!nfe_desc->valid_p)
722 return NULL;
724 return &nfe_desc->niter;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
729 goes wrong. */
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
734 edge exit = single_dom_exit (data->current_loop);
736 if (!exit)
737 return NULL;
739 return niter_for_exit (data, exit);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
745 static void
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
748 unsigned i;
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
770 static tree
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
777 return NULL_TREE;
779 switch (code)
781 case INTEGER_CST:
782 return NULL_TREE;
784 case ADDR_EXPR:
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
788 if (!base)
789 return expr;
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
797 case PLUS_EXPR:
798 case MINUS_EXPR:
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
802 if (!op1)
803 return op0;
805 if (!op0)
806 return (code == PLUS_EXPR
807 ? op1
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
810 return fold_build2 (code, ptr_type_node, op0, op1);
812 case NOP_EXPR:
813 case CONVERT_EXPR:
814 return determine_base_object (TREE_OPERAND (expr, 0));
816 default:
817 return fold_convert (ptr_type_node, expr);
821 /* Allocates an induction variable with given initial value BASE and step STEP
822 for loop LOOP. */
824 static struct iv *
825 alloc_iv (tree base, tree step)
827 struct iv *iv = xcalloc (1, sizeof (struct iv));
829 if (step && integer_zerop (step))
830 step = NULL_TREE;
832 iv->base = base;
833 iv->base_object = determine_base_object (base);
834 iv->step = step;
835 iv->biv_p = false;
836 iv->have_use_for = false;
837 iv->use_id = 0;
838 iv->ssa_name = NULL_TREE;
840 return iv;
843 /* Sets STEP and BASE for induction variable IV. */
845 static void
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
848 struct version_info *info = name_info (data, iv);
850 gcc_assert (!info->iv);
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
857 /* Finds induction variable declaration for VAR. */
859 static struct iv *
860 get_iv (struct ivopts_data *data, tree var)
862 basic_block bb;
864 if (!name_info (data, var)->iv)
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
868 if (!bb
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
873 return name_info (data, var)->iv;
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
879 static tree
880 determine_biv_step (tree phi)
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi), base, step;
885 if (!is_gimple_reg (name))
886 return NULL_TREE;
888 if (!simple_iv (loop, phi, name, &base, &step, true))
889 return NULL_TREE;
891 if (zero_p (step))
892 return NULL_TREE;
894 return step;
897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
899 static bool
900 abnormal_ssa_name_p (tree exp)
902 if (!exp)
903 return false;
905 if (TREE_CODE (exp) != SSA_NAME)
906 return false;
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
912 abnormal phi node. Callback for for_each_index. */
914 static bool
915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
916 void *data ATTRIBUTE_UNUSED)
918 if (TREE_CODE (base) == ARRAY_REF)
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 return false;
922 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
923 return false;
926 return !abnormal_ssa_name_p (*index);
929 /* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
932 static bool
933 contains_abnormal_ssa_name_p (tree expr)
935 enum tree_code code;
936 enum tree_code_class class;
938 if (!expr)
939 return false;
941 code = TREE_CODE (expr);
942 class = TREE_CODE_CLASS (code);
944 if (code == SSA_NAME)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
947 if (code == INTEGER_CST
948 || is_gimple_min_invariant (expr))
949 return false;
951 if (code == ADDR_EXPR)
952 return !for_each_index (&TREE_OPERAND (expr, 0),
953 idx_contains_abnormal_ssa_name_p,
954 NULL);
956 switch (class)
958 case tcc_binary:
959 case tcc_comparison:
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
961 return true;
963 /* Fallthru. */
964 case tcc_unary:
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
966 return true;
968 break;
970 default:
971 gcc_unreachable ();
974 return false;
977 /* Finds basic ivs. */
979 static bool
980 find_bivs (struct ivopts_data *data)
982 tree phi, step, type, base;
983 bool found = false;
984 struct loop *loop = data->current_loop;
986 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
989 continue;
991 step = determine_biv_step (phi);
992 if (!step)
993 continue;
995 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
996 base = expand_simple_operations (base);
997 if (contains_abnormal_ssa_name_p (base)
998 || contains_abnormal_ssa_name_p (step))
999 continue;
1001 type = TREE_TYPE (PHI_RESULT (phi));
1002 base = fold_convert (type, base);
1003 if (step)
1004 step = fold_convert (type, step);
1006 set_iv (data, PHI_RESULT (phi), base, step);
1007 found = true;
1010 return found;
1013 /* Marks basic ivs. */
1015 static void
1016 mark_bivs (struct ivopts_data *data)
1018 tree phi, var;
1019 struct iv *iv, *incr_iv;
1020 struct loop *loop = data->current_loop;
1021 basic_block incr_bb;
1023 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025 iv = get_iv (data, PHI_RESULT (phi));
1026 if (!iv)
1027 continue;
1029 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030 incr_iv = get_iv (data, var);
1031 if (!incr_iv)
1032 continue;
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036 if (incr_bb->loop_father != data->current_loop
1037 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1038 continue;
1040 iv->biv_p = true;
1041 incr_iv->biv_p = true;
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1048 static bool
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050 tree *base, tree *step)
1052 tree lhs;
1053 struct loop *loop = data->current_loop;
1055 *base = NULL_TREE;
1056 *step = NULL_TREE;
1058 if (TREE_CODE (stmt) != MODIFY_EXPR)
1059 return false;
1061 lhs = TREE_OPERAND (stmt, 0);
1062 if (TREE_CODE (lhs) != SSA_NAME)
1063 return false;
1065 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066 return false;
1067 *base = expand_simple_operations (*base);
1069 if (contains_abnormal_ssa_name_p (*base)
1070 || contains_abnormal_ssa_name_p (*step))
1071 return false;
1073 return true;
1076 /* Finds general ivs in statement STMT. */
1078 static void
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1081 tree base, step;
1083 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1084 return;
1086 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1089 /* Finds general ivs in basic block BB. */
1091 static void
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 block_stmt_iterator bsi;
1096 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097 find_givs_in_stmt (data, bsi_stmt (bsi));
1100 /* Finds general ivs. */
1102 static void
1103 find_givs (struct ivopts_data *data)
1105 struct loop *loop = data->current_loop;
1106 basic_block *body = get_loop_body_in_dom_order (loop);
1107 unsigned i;
1109 for (i = 0; i < loop->num_nodes; i++)
1110 find_givs_in_bb (data, body[i]);
1111 free (body);
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1117 static bool
1118 find_induction_variables (struct ivopts_data *data)
1120 unsigned i;
1121 bitmap_iterator bi;
1123 if (!find_bivs (data))
1124 return false;
1126 find_givs (data);
1127 mark_bivs (data);
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1131 struct tree_niter_desc *niter;
1133 niter = niter_for_single_dom_exit (data);
1135 if (niter)
1137 fprintf (dump_file, " number of iterations ");
1138 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139 fprintf (dump_file, "\n");
1141 fprintf (dump_file, " may be zero if ");
1142 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "\n");
1147 fprintf (dump_file, "Induction variables:\n\n");
1149 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151 if (ver_info (data, i)->iv)
1152 dump_iv (dump_file, ver_info (data, i)->iv);
1156 return true;
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163 tree stmt, enum use_type use_type)
1165 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167 use->id = n_iv_uses (data);
1168 use->type = use_type;
1169 use->iv = iv;
1170 use->stmt = stmt;
1171 use->op_p = use_p;
1172 use->related_cands = BITMAP_ALLOC (NULL);
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1175 caller. */
1176 iv->ssa_name = NULL_TREE;
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 dump_use (dump_file, use);
1181 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183 return use;
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1190 static void
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 basic_block bb;
1194 struct version_info *info;
1196 if (TREE_CODE (op) != SSA_NAME
1197 || !is_gimple_reg (op))
1198 return;
1200 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201 if (bb
1202 && flow_bb_inside_loop_p (data->current_loop, bb))
1203 return;
1205 info = name_info (data, op);
1206 info->name = op;
1207 info->has_nonlin_use |= nonlinear_use;
1208 if (!info->inv_id)
1209 info->inv_id = ++data->max_inv_id;
1210 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1213 /* Checks whether the use OP is interesting and if so, records it
1214 as TYPE. */
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1218 enum use_type type)
1220 struct iv *iv;
1221 struct iv *civ;
1222 tree stmt;
1223 struct iv_use *use;
1225 if (TREE_CODE (op) != SSA_NAME)
1226 return NULL;
1228 iv = get_iv (data, op);
1229 if (!iv)
1230 return NULL;
1232 if (iv->have_use_for)
1234 use = iv_use (data, iv->use_id);
1236 gcc_assert (use->type == USE_NONLINEAR_EXPR
1237 || use->type == USE_OUTER);
1239 if (type == USE_NONLINEAR_EXPR)
1240 use->type = USE_NONLINEAR_EXPR;
1241 return use;
1244 if (zero_p (iv->step))
1246 record_invariant (data, op, true);
1247 return NULL;
1249 iv->have_use_for = true;
1251 civ = xmalloc (sizeof (struct iv));
1252 *civ = *iv;
1254 stmt = SSA_NAME_DEF_STMT (op);
1255 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256 || TREE_CODE (stmt) == MODIFY_EXPR);
1258 use = record_use (data, NULL, civ, stmt, type);
1259 iv->use_id = use->id;
1261 return use;
1264 /* Checks whether the use OP is interesting and if so, records it. */
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1272 /* Records a definition of induction variable OP that is used outside of the
1273 loop. */
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1284 static void
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1287 tree *op0_p;
1288 tree *op1_p;
1289 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290 struct iv const_iv;
1291 tree zero = integer_zero_node;
1293 const_iv.step = NULL_TREE;
1295 if (TREE_CODE (*cond_p) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p))
1297 return;
1299 if (TREE_CODE (*cond_p) == SSA_NAME)
1301 op0_p = cond_p;
1302 op1_p = &zero;
1304 else
1306 op0_p = &TREE_OPERAND (*cond_p, 0);
1307 op1_p = &TREE_OPERAND (*cond_p, 1);
1310 if (TREE_CODE (*op0_p) == SSA_NAME)
1311 iv0 = get_iv (data, *op0_p);
1312 else
1313 iv0 = &const_iv;
1315 if (TREE_CODE (*op1_p) == SSA_NAME)
1316 iv1 = get_iv (data, *op1_p);
1317 else
1318 iv1 = &const_iv;
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1322 (!iv0 || !iv1)
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327 find_interesting_uses_op (data, *op0_p);
1328 find_interesting_uses_op (data, *op1_p);
1329 return;
1332 if (zero_p (iv0->step) && zero_p (iv1->step))
1334 /* If both are invariants, this is a work for unswitching. */
1335 return;
1338 civ = xmalloc (sizeof (struct iv));
1339 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1346 bool
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 basic_block def_bb;
1350 unsigned i, len;
1352 if (is_gimple_min_invariant (expr))
1353 return true;
1355 if (TREE_CODE (expr) == SSA_NAME)
1357 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358 if (def_bb
1359 && flow_bb_inside_loop_p (loop, def_bb))
1360 return false;
1362 return true;
1365 if (!EXPR_P (expr))
1366 return false;
1368 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369 for (i = 0; i < len; i++)
1370 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1371 return false;
1373 return true;
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1380 struct ifs_ivopts_data
1382 struct ivopts_data *ivopts_data;
1383 tree stmt;
1384 tree *step_p;
1387 static bool
1388 idx_find_step (tree base, tree *idx, void *data)
1390 struct ifs_ivopts_data *dta = data;
1391 struct iv *iv;
1392 tree step, iv_step, lbound, off;
1393 struct loop *loop = dta->ivopts_data->current_loop;
1395 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1397 return false;
1399 /* If base is a component ref, require that the offset of the reference
1400 be invariant. */
1401 if (TREE_CODE (base) == COMPONENT_REF)
1403 off = component_ref_field_offset (base);
1404 return expr_invariant_in_loop_p (loop, off);
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base) == ARRAY_REF)
1413 step = array_ref_element_size (base);
1414 lbound = array_ref_low_bound (base);
1416 if (!expr_invariant_in_loop_p (loop, step)
1417 || !expr_invariant_in_loop_p (loop, lbound))
1418 return false;
1421 if (TREE_CODE (*idx) != SSA_NAME)
1422 return true;
1424 iv = get_iv (dta->ivopts_data, *idx);
1425 if (!iv)
1426 return false;
1428 *idx = iv->base;
1430 if (!iv->step)
1431 return true;
1433 if (TREE_CODE (base) == ARRAY_REF)
1435 step = array_ref_element_size (base);
1437 /* We only handle addresses whose step is an integer constant. */
1438 if (TREE_CODE (step) != INTEGER_CST)
1439 return false;
1441 else
1442 /* The step for pointer arithmetics already is 1 byte. */
1443 step = build_int_cst (sizetype, 1);
1445 if (TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
1446 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1447 sizetype, iv->base, iv->step, dta->stmt);
1448 else
1449 iv_step = fold_convert (sizetype, iv->step);
1451 if (!iv_step)
1453 /* The index might wrap. */
1454 return false;
1457 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1459 if (!*dta->step_p)
1460 *dta->step_p = step;
1461 else
1462 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1464 return true;
1467 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1470 static bool
1471 idx_record_use (tree base, tree *idx,
1472 void *data)
1474 find_interesting_uses_op (data, *idx);
1475 if (TREE_CODE (base) == ARRAY_REF)
1477 find_interesting_uses_op (data, array_ref_element_size (base));
1478 find_interesting_uses_op (data, array_ref_low_bound (base));
1480 return true;
1483 /* Returns true if memory reference REF may be unaligned. */
1485 static bool
1486 may_be_unaligned_p (tree ref)
1488 tree base;
1489 tree base_type;
1490 HOST_WIDE_INT bitsize;
1491 HOST_WIDE_INT bitpos;
1492 tree toffset;
1493 enum machine_mode mode;
1494 int unsignedp, volatilep;
1495 unsigned base_align;
1497 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1498 thus they are not missaligned. */
1499 if (TREE_CODE (ref) == TARGET_MEM_REF)
1500 return false;
1502 /* The test below is basically copy of what expr.c:normal_inner_ref
1503 does to check whether the object must be loaded by parts when
1504 STRICT_ALIGNMENT is true. */
1505 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1506 &unsignedp, &volatilep, true);
1507 base_type = TREE_TYPE (base);
1508 base_align = TYPE_ALIGN (base_type);
1510 if (mode != BLKmode
1511 && (base_align < GET_MODE_ALIGNMENT (mode)
1512 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1513 || bitpos % BITS_PER_UNIT != 0))
1514 return true;
1516 return false;
1519 /* Finds addresses in *OP_P inside STMT. */
1521 static void
1522 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1524 tree base = *op_p, step = NULL;
1525 struct iv *civ;
1526 struct ifs_ivopts_data ifs_ivopts_data;
1528 /* Do not play with volatile memory references. A bit too conservative,
1529 perhaps, but safe. */
1530 if (stmt_ann (stmt)->has_volatile_ops)
1531 goto fail;
1533 /* Ignore bitfields for now. Not really something terribly complicated
1534 to handle. TODO. */
1535 if (TREE_CODE (base) == COMPONENT_REF
1536 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537 goto fail;
1539 if (STRICT_ALIGNMENT
1540 && may_be_unaligned_p (base))
1541 goto fail;
1543 base = unshare_expr (base);
1545 if (TREE_CODE (base) == TARGET_MEM_REF)
1547 tree type = build_pointer_type (TREE_TYPE (base));
1548 tree astep;
1550 if (TMR_BASE (base)
1551 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1553 civ = get_iv (data, TMR_BASE (base));
1554 if (!civ)
1555 goto fail;
1557 TMR_BASE (base) = civ->base;
1558 step = civ->step;
1560 if (TMR_INDEX (base)
1561 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1563 civ = get_iv (data, TMR_INDEX (base));
1564 if (!civ)
1565 goto fail;
1567 TMR_INDEX (base) = civ->base;
1568 astep = civ->step;
1570 if (astep)
1572 if (TMR_STEP (base))
1573 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1575 if (step)
1576 step = fold_build2 (PLUS_EXPR, type, step, astep);
1577 else
1578 step = astep;
1582 if (zero_p (step))
1583 goto fail;
1584 base = tree_mem_ref_addr (type, base);
1586 else
1588 ifs_ivopts_data.ivopts_data = data;
1589 ifs_ivopts_data.stmt = stmt;
1590 ifs_ivopts_data.step_p = &step;
1591 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1592 || zero_p (step))
1593 goto fail;
1595 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1596 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1598 base = build_fold_addr_expr (base);
1601 civ = alloc_iv (base, step);
1602 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1603 return;
1605 fail:
1606 for_each_index (op_p, idx_record_use, data);
1609 /* Finds and records invariants used in STMT. */
1611 static void
1612 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1614 ssa_op_iter iter;
1615 use_operand_p use_p;
1616 tree op;
1618 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1620 op = USE_FROM_PTR (use_p);
1621 record_invariant (data, op, false);
1625 /* Finds interesting uses of induction variables in the statement STMT. */
1627 static void
1628 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1630 struct iv *iv;
1631 tree op, lhs, rhs;
1632 ssa_op_iter iter;
1633 use_operand_p use_p;
1635 find_invariants_stmt (data, stmt);
1637 if (TREE_CODE (stmt) == COND_EXPR)
1639 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1640 return;
1643 if (TREE_CODE (stmt) == MODIFY_EXPR)
1645 lhs = TREE_OPERAND (stmt, 0);
1646 rhs = TREE_OPERAND (stmt, 1);
1648 if (TREE_CODE (lhs) == SSA_NAME)
1650 /* If the statement defines an induction variable, the uses are not
1651 interesting by themselves. */
1653 iv = get_iv (data, lhs);
1655 if (iv && !zero_p (iv->step))
1656 return;
1659 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1661 case tcc_comparison:
1662 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1663 return;
1665 case tcc_reference:
1666 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1667 if (REFERENCE_CLASS_P (lhs))
1668 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1669 return;
1671 default: ;
1674 if (REFERENCE_CLASS_P (lhs)
1675 && is_gimple_val (rhs))
1677 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1678 find_interesting_uses_op (data, rhs);
1679 return;
1682 /* TODO -- we should also handle address uses of type
1684 memory = call (whatever);
1688 call (memory). */
1691 if (TREE_CODE (stmt) == PHI_NODE
1692 && bb_for_stmt (stmt) == data->current_loop->header)
1694 lhs = PHI_RESULT (stmt);
1695 iv = get_iv (data, lhs);
1697 if (iv && !zero_p (iv->step))
1698 return;
1701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1703 op = USE_FROM_PTR (use_p);
1705 if (TREE_CODE (op) != SSA_NAME)
1706 continue;
1708 iv = get_iv (data, op);
1709 if (!iv)
1710 continue;
1712 find_interesting_uses_op (data, op);
1716 /* Finds interesting uses of induction variables outside of loops
1717 on loop exit edge EXIT. */
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1722 tree phi, def;
1724 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1726 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727 find_interesting_uses_outer (data, def);
1731 /* Finds uses of the induction variables that are interesting. */
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1736 basic_block bb;
1737 block_stmt_iterator bsi;
1738 tree phi;
1739 basic_block *body = get_loop_body (data->current_loop);
1740 unsigned i;
1741 struct version_info *info;
1742 edge e;
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "Uses:\n\n");
1747 for (i = 0; i < data->current_loop->num_nodes; i++)
1749 edge_iterator ei;
1750 bb = body[i];
1752 FOR_EACH_EDGE (e, ei, bb->succs)
1753 if (e->dest != EXIT_BLOCK_PTR
1754 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755 find_interesting_uses_outside (data, e);
1757 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758 find_interesting_uses_stmt (data, phi);
1759 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1763 if (dump_file && (dump_flags & TDF_DETAILS))
1765 bitmap_iterator bi;
1767 fprintf (dump_file, "\n");
1769 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1771 info = ver_info (data, i);
1772 if (info->inv_id)
1774 fprintf (dump_file, " ");
1775 print_generic_expr (dump_file, info->name, TDF_SLIM);
1776 fprintf (dump_file, " is invariant (%d)%s\n",
1777 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1781 fprintf (dump_file, "\n");
1784 free (body);
1787 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1788 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1789 we are at the top-level of the processed address. */
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793 unsigned HOST_WIDE_INT *offset)
1795 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796 enum tree_code code;
1797 tree type, orig_type = TREE_TYPE (expr);
1798 unsigned HOST_WIDE_INT off0, off1, st;
1799 tree orig_expr = expr;
1801 STRIP_NOPS (expr);
1803 type = TREE_TYPE (expr);
1804 code = TREE_CODE (expr);
1805 *offset = 0;
1807 switch (code)
1809 case INTEGER_CST:
1810 if (!cst_and_fits_in_hwi (expr)
1811 || zero_p (expr))
1812 return orig_expr;
1814 *offset = int_cst_value (expr);
1815 return build_int_cst_type (orig_type, 0);
1817 case PLUS_EXPR:
1818 case MINUS_EXPR:
1819 op0 = TREE_OPERAND (expr, 0);
1820 op1 = TREE_OPERAND (expr, 1);
1822 op0 = strip_offset_1 (op0, false, false, &off0);
1823 op1 = strip_offset_1 (op1, false, false, &off1);
1825 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826 if (op0 == TREE_OPERAND (expr, 0)
1827 && op1 == TREE_OPERAND (expr, 1))
1828 return orig_expr;
1830 if (zero_p (op1))
1831 expr = op0;
1832 else if (zero_p (op0))
1834 if (code == PLUS_EXPR)
1835 expr = op1;
1836 else
1837 expr = fold_build1 (NEGATE_EXPR, type, op1);
1839 else
1840 expr = fold_build2 (code, type, op0, op1);
1842 return fold_convert (orig_type, expr);
1844 case ARRAY_REF:
1845 if (!inside_addr)
1846 return orig_expr;
1848 step = array_ref_element_size (expr);
1849 if (!cst_and_fits_in_hwi (step))
1850 break;
1852 st = int_cst_value (step);
1853 op1 = TREE_OPERAND (expr, 1);
1854 op1 = strip_offset_1 (op1, false, false, &off1);
1855 *offset = off1 * st;
1857 if (top_compref
1858 && zero_p (op1))
1860 /* Strip the component reference completely. */
1861 op0 = TREE_OPERAND (expr, 0);
1862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863 *offset += off0;
1864 return op0;
1866 break;
1868 case COMPONENT_REF:
1869 if (!inside_addr)
1870 return orig_expr;
1872 tmp = component_ref_field_offset (expr);
1873 if (top_compref
1874 && cst_and_fits_in_hwi (tmp))
1876 /* Strip the component reference completely. */
1877 op0 = TREE_OPERAND (expr, 0);
1878 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879 *offset = off0 + int_cst_value (tmp);
1880 return op0;
1882 break;
1884 case ADDR_EXPR:
1885 op0 = TREE_OPERAND (expr, 0);
1886 op0 = strip_offset_1 (op0, true, true, &off0);
1887 *offset += off0;
1889 if (op0 == TREE_OPERAND (expr, 0))
1890 return orig_expr;
1892 expr = build_fold_addr_expr (op0);
1893 return fold_convert (orig_type, expr);
1895 case INDIRECT_REF:
1896 inside_addr = false;
1897 break;
1899 default:
1900 return orig_expr;
1903 /* Default handling of expressions for that we want to recurse into
1904 the first operand. */
1905 op0 = TREE_OPERAND (expr, 0);
1906 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907 *offset += off0;
1909 if (op0 == TREE_OPERAND (expr, 0)
1910 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911 return orig_expr;
1913 expr = copy_node (expr);
1914 TREE_OPERAND (expr, 0) = op0;
1915 if (op1)
1916 TREE_OPERAND (expr, 1) = op1;
1918 /* Inside address, we might strip the top level component references,
1919 thus changing type of the expression. Handling of ADDR_EXPR
1920 will fix that. */
1921 expr = fold_convert (orig_type, expr);
1923 return expr;
1926 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1931 return strip_offset_1 (expr, false, false, offset);
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935 For integer types, we return unsigned variant of the type, which
1936 avoids problems with overflows. For pointer types, we return void *. */
1938 static tree
1939 generic_type_for (tree type)
1941 if (POINTER_TYPE_P (type))
1942 return ptr_type_node;
1944 if (TYPE_UNSIGNED (type))
1945 return type;
1947 return unsigned_type_for (type);
1950 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1951 the bitmap to that we should store it. */
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1957 bitmap *depends_on = data;
1958 struct version_info *info;
1960 if (TREE_CODE (*expr_p) != SSA_NAME)
1961 return NULL_TREE;
1962 info = name_info (fd_ivopts_data, *expr_p);
1964 if (!info->inv_id || info->has_nonlin_use)
1965 return NULL_TREE;
1967 if (!*depends_on)
1968 *depends_on = BITMAP_ALLOC (NULL);
1969 bitmap_set_bit (*depends_on, info->inv_id);
1971 return NULL_TREE;
1974 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1975 position to POS. If USE is not NULL, the candidate is set as related to
1976 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1977 replacement of the final value of the iv by a direct computation. */
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981 tree base, tree step, bool important, enum iv_position pos,
1982 struct iv_use *use, tree incremented_at)
1984 unsigned i;
1985 struct iv_cand *cand = NULL;
1986 tree type, orig_type;
1988 if (base)
1990 orig_type = TREE_TYPE (base);
1991 type = generic_type_for (orig_type);
1992 if (type != orig_type)
1994 base = fold_convert (type, base);
1995 if (step)
1996 step = fold_convert (type, step);
2000 for (i = 0; i < n_iv_cands (data); i++)
2002 cand = iv_cand (data, i);
2004 if (cand->pos != pos)
2005 continue;
2007 if (cand->incremented_at != incremented_at)
2008 continue;
2010 if (!cand->iv)
2012 if (!base && !step)
2013 break;
2015 continue;
2018 if (!base && !step)
2019 continue;
2021 if (!operand_equal_p (base, cand->iv->base, 0))
2022 continue;
2024 if (zero_p (cand->iv->step))
2026 if (zero_p (step))
2027 break;
2029 else
2031 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 break;
2036 if (i == n_iv_cands (data))
2038 cand = xcalloc (1, sizeof (struct iv_cand));
2039 cand->id = i;
2041 if (!base && !step)
2042 cand->iv = NULL;
2043 else
2044 cand->iv = alloc_iv (base, step);
2046 cand->pos = pos;
2047 if (pos != IP_ORIGINAL && cand->iv)
2049 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050 cand->var_after = cand->var_before;
2052 cand->important = important;
2053 cand->incremented_at = incremented_at;
2054 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2056 if (step
2057 && TREE_CODE (step) != INTEGER_CST)
2059 fd_ivopts_data = data;
2060 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 dump_cand (dump_file, cand);
2067 if (important && !cand->important)
2069 cand->important = true;
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2074 if (use)
2076 bitmap_set_bit (use->related_cands, i);
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2078 fprintf (dump_file, "Candidate %d is related to use %d\n",
2079 cand->id, use->id);
2082 return cand;
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086 is allowed.
2088 The purpose is to avoid splitting latch edge with a biv increment, thus
2089 creating a jump, possibly confusing other optimization passes and leaving
2090 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2091 is not available (so we do not have a better alternative), or if the latch
2092 edge is already nonempty. */
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2097 if (!ip_normal_pos (loop))
2098 return true;
2100 if (!empty_block_p (ip_end_pos (loop)))
2101 return true;
2103 return false;
2106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2107 position to POS. If USE is not NULL, the candidate is set as related to
2108 it. The candidate computation is scheduled on all available positions. */
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112 tree base, tree step, bool important, struct iv_use *use)
2114 if (ip_normal_pos (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116 if (ip_end_pos (data->current_loop)
2117 && allow_ip_end_pos_p (data->current_loop))
2118 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122 type with SIZE bits. */
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126 unsigned int size)
2128 tree type = lang_hooks.types.type_for_size (size, true);
2129 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130 true, NULL);
2133 /* Adds standard iv candidates. */
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2140 /* The same for a double-integer type if it is still fast enough. */
2141 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2146 /* Adds candidates bases on the old induction variable IV. */
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2151 tree phi, def;
2152 struct iv_cand *cand;
2154 add_candidate (data, iv->base, iv->step, true, NULL);
2156 /* The same, but with initial value zero. */
2157 add_candidate (data,
2158 build_int_cst (TREE_TYPE (iv->base), 0),
2159 iv->step, true, NULL);
2161 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162 if (TREE_CODE (phi) == PHI_NODE)
2164 /* Additionally record the possibility of leaving the original iv
2165 untouched. */
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167 cand = add_candidate_1 (data,
2168 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169 SSA_NAME_DEF_STMT (def));
2170 cand->var_before = iv->ssa_name;
2171 cand->var_after = def;
2175 /* Adds candidates based on the old induction variables. */
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2180 unsigned i;
2181 struct iv *iv;
2182 bitmap_iterator bi;
2184 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2186 iv = ver_info (data, i)->iv;
2187 if (iv && iv->biv_p && !zero_p (iv->step))
2188 add_old_iv_candidates (data, iv);
2192 /* Adds candidates based on the value of the induction variable IV and USE. */
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196 struct iv *iv, struct iv_use *use)
2198 unsigned HOST_WIDE_INT offset;
2199 tree base;
2201 add_candidate (data, iv->base, iv->step, false, use);
2203 /* The same, but with initial value zero. Make such variable important,
2204 since it is generic enough so that possibly many uses may be based
2205 on it. */
2206 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207 iv->step, true, use);
2209 /* Third, try removing the constant offset. */
2210 base = strip_offset (iv->base, &offset);
2211 if (offset)
2212 add_candidate (data, base, iv->step, false, use);
2215 /* Possibly adds pseudocandidate for replacing the final value of USE by
2216 a direct computation. */
2218 static void
2219 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2221 struct tree_niter_desc *niter;
2223 /* We must know where we exit the loop and how many times does it roll. */
2224 niter = niter_for_single_dom_exit (data);
2225 if (!niter
2226 || !zero_p (niter->may_be_zero))
2227 return;
2229 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2232 /* Adds candidates based on the uses. */
2234 static void
2235 add_derived_ivs_candidates (struct ivopts_data *data)
2237 unsigned i;
2239 for (i = 0; i < n_iv_uses (data); i++)
2241 struct iv_use *use = iv_use (data, i);
2243 if (!use)
2244 continue;
2246 switch (use->type)
2248 case USE_NONLINEAR_EXPR:
2249 case USE_COMPARE:
2250 case USE_ADDRESS:
2251 /* Just add the ivs based on the value of the iv used here. */
2252 add_iv_value_candidates (data, use->iv, use);
2253 break;
2255 case USE_OUTER:
2256 add_iv_value_candidates (data, use->iv, use);
2258 /* Additionally, add the pseudocandidate for the possibility to
2259 replace the final value by a direct computation. */
2260 add_iv_outer_candidates (data, use);
2261 break;
2263 default:
2264 gcc_unreachable ();
2269 /* Record important candidates and add them to related_cands bitmaps
2270 if needed. */
2272 static void
2273 record_important_candidates (struct ivopts_data *data)
2275 unsigned i;
2276 struct iv_use *use;
2278 for (i = 0; i < n_iv_cands (data); i++)
2280 struct iv_cand *cand = iv_cand (data, i);
2282 if (cand->important)
2283 bitmap_set_bit (data->important_candidates, i);
2286 data->consider_all_candidates = (n_iv_cands (data)
2287 <= CONSIDER_ALL_CANDIDATES_BOUND);
2289 if (data->consider_all_candidates)
2291 /* We will not need "related_cands" bitmaps in this case,
2292 so release them to decrease peak memory consumption. */
2293 for (i = 0; i < n_iv_uses (data); i++)
2295 use = iv_use (data, i);
2296 BITMAP_FREE (use->related_cands);
2299 else
2301 /* Add important candidates to the related_cands bitmaps. */
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 bitmap_ior_into (iv_use (data, i)->related_cands,
2304 data->important_candidates);
2308 /* Finds the candidates for the induction variables. */
2310 static void
2311 find_iv_candidates (struct ivopts_data *data)
2313 /* Add commonly used ivs. */
2314 add_standard_iv_candidates (data);
2316 /* Add old induction variables. */
2317 add_old_ivs_candidates (data);
2319 /* Add induction variables derived from uses. */
2320 add_derived_ivs_candidates (data);
2322 /* Record the important candidates. */
2323 record_important_candidates (data);
2326 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2327 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2328 we allocate a simple list to every use. */
2330 static void
2331 alloc_use_cost_map (struct ivopts_data *data)
2333 unsigned i, size, s, j;
2335 for (i = 0; i < n_iv_uses (data); i++)
2337 struct iv_use *use = iv_use (data, i);
2338 bitmap_iterator bi;
2340 if (data->consider_all_candidates)
2341 size = n_iv_cands (data);
2342 else
2344 s = 0;
2345 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2347 s++;
2350 /* Round up to the power of two, so that moduling by it is fast. */
2351 for (size = 1; size < s; size <<= 1)
2352 continue;
2355 use->n_map_members = size;
2356 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2360 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2361 on invariants DEPENDS_ON and that the value used in expressing it
2362 is VALUE.*/
2364 static void
2365 set_use_iv_cost (struct ivopts_data *data,
2366 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2367 bitmap depends_on, tree value)
2369 unsigned i, s;
2371 if (cost == INFTY)
2373 BITMAP_FREE (depends_on);
2374 return;
2377 if (data->consider_all_candidates)
2379 use->cost_map[cand->id].cand = cand;
2380 use->cost_map[cand->id].cost = cost;
2381 use->cost_map[cand->id].depends_on = depends_on;
2382 use->cost_map[cand->id].value = value;
2383 return;
2386 /* n_map_members is a power of two, so this computes modulo. */
2387 s = cand->id & (use->n_map_members - 1);
2388 for (i = s; i < use->n_map_members; i++)
2389 if (!use->cost_map[i].cand)
2390 goto found;
2391 for (i = 0; i < s; i++)
2392 if (!use->cost_map[i].cand)
2393 goto found;
2395 gcc_unreachable ();
2397 found:
2398 use->cost_map[i].cand = cand;
2399 use->cost_map[i].cost = cost;
2400 use->cost_map[i].depends_on = depends_on;
2401 use->cost_map[i].value = value;
2404 /* Gets cost of (USE, CANDIDATE) pair. */
2406 static struct cost_pair *
2407 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2408 struct iv_cand *cand)
2410 unsigned i, s;
2411 struct cost_pair *ret;
2413 if (!cand)
2414 return NULL;
2416 if (data->consider_all_candidates)
2418 ret = use->cost_map + cand->id;
2419 if (!ret->cand)
2420 return NULL;
2422 return ret;
2425 /* n_map_members is a power of two, so this computes modulo. */
2426 s = cand->id & (use->n_map_members - 1);
2427 for (i = s; i < use->n_map_members; i++)
2428 if (use->cost_map[i].cand == cand)
2429 return use->cost_map + i;
2431 for (i = 0; i < s; i++)
2432 if (use->cost_map[i].cand == cand)
2433 return use->cost_map + i;
2435 return NULL;
2438 /* Returns estimate on cost of computing SEQ. */
2440 static unsigned
2441 seq_cost (rtx seq)
2443 unsigned cost = 0;
2444 rtx set;
2446 for (; seq; seq = NEXT_INSN (seq))
2448 set = single_set (seq);
2449 if (set)
2450 cost += rtx_cost (set, SET);
2451 else
2452 cost++;
2455 return cost;
2458 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2459 static rtx
2460 produce_memory_decl_rtl (tree obj, int *regno)
2462 rtx x;
2464 gcc_assert (obj);
2465 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2467 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2468 x = gen_rtx_SYMBOL_REF (Pmode, name);
2470 else
2471 x = gen_raw_REG (Pmode, (*regno)++);
2473 return gen_rtx_MEM (DECL_MODE (obj), x);
2476 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2477 walk_tree. DATA contains the actual fake register number. */
2479 static tree
2480 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2482 tree obj = NULL_TREE;
2483 rtx x = NULL_RTX;
2484 int *regno = data;
2486 switch (TREE_CODE (*expr_p))
2488 case ADDR_EXPR:
2489 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2490 handled_component_p (*expr_p);
2491 expr_p = &TREE_OPERAND (*expr_p, 0))
2492 continue;
2493 obj = *expr_p;
2494 if (DECL_P (obj))
2495 x = produce_memory_decl_rtl (obj, regno);
2496 break;
2498 case SSA_NAME:
2499 *ws = 0;
2500 obj = SSA_NAME_VAR (*expr_p);
2501 if (!DECL_RTL_SET_P (obj))
2502 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2503 break;
2505 case VAR_DECL:
2506 case PARM_DECL:
2507 case RESULT_DECL:
2508 *ws = 0;
2509 obj = *expr_p;
2511 if (DECL_RTL_SET_P (obj))
2512 break;
2514 if (DECL_MODE (obj) == BLKmode)
2515 x = produce_memory_decl_rtl (obj, regno);
2516 else
2517 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2519 break;
2521 default:
2522 break;
2525 if (x)
2527 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2528 SET_DECL_RTL (obj, x);
2531 return NULL_TREE;
2534 /* Determines cost of the computation of EXPR. */
2536 static unsigned
2537 computation_cost (tree expr)
2539 rtx seq, rslt;
2540 tree type = TREE_TYPE (expr);
2541 unsigned cost;
2542 /* Avoid using hard regs in ways which may be unsupported. */
2543 int regno = LAST_VIRTUAL_REGISTER + 1;
2545 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2546 start_sequence ();
2547 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2548 seq = get_insns ();
2549 end_sequence ();
2551 cost = seq_cost (seq);
2552 if (MEM_P (rslt))
2553 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2555 return cost;
2558 /* Returns variable containing the value of candidate CAND at statement AT. */
2560 static tree
2561 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2563 if (stmt_after_increment (loop, cand, stmt))
2564 return cand->var_after;
2565 else
2566 return cand->var_before;
2569 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2570 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2573 tree_int_cst_sign_bit (tree t)
2575 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2576 unsigned HOST_WIDE_INT w;
2578 if (bitno < HOST_BITS_PER_WIDE_INT)
2579 w = TREE_INT_CST_LOW (t);
2580 else
2582 w = TREE_INT_CST_HIGH (t);
2583 bitno -= HOST_BITS_PER_WIDE_INT;
2586 return (w >> bitno) & 1;
2589 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2590 return cst. Otherwise return NULL_TREE. */
2592 static tree
2593 constant_multiple_of (tree type, tree top, tree bot)
2595 tree res, mby, p0, p1;
2596 enum tree_code code;
2597 bool negate;
2599 STRIP_NOPS (top);
2600 STRIP_NOPS (bot);
2602 if (operand_equal_p (top, bot, 0))
2603 return build_int_cst (type, 1);
2605 code = TREE_CODE (top);
2606 switch (code)
2608 case MULT_EXPR:
2609 mby = TREE_OPERAND (top, 1);
2610 if (TREE_CODE (mby) != INTEGER_CST)
2611 return NULL_TREE;
2613 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614 if (!res)
2615 return NULL_TREE;
2617 return fold_binary_to_constant (MULT_EXPR, type, res,
2618 fold_convert (type, mby));
2620 case PLUS_EXPR:
2621 case MINUS_EXPR:
2622 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623 if (!p0)
2624 return NULL_TREE;
2625 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2626 if (!p1)
2627 return NULL_TREE;
2629 return fold_binary_to_constant (code, type, p0, p1);
2631 case INTEGER_CST:
2632 if (TREE_CODE (bot) != INTEGER_CST)
2633 return NULL_TREE;
2635 bot = fold_convert (type, bot);
2636 top = fold_convert (type, top);
2638 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2639 the result afterwards. */
2640 if (tree_int_cst_sign_bit (bot))
2642 negate = true;
2643 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2645 else
2646 negate = false;
2648 /* Ditto for TOP. */
2649 if (tree_int_cst_sign_bit (top))
2651 negate = !negate;
2652 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2655 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656 return NULL_TREE;
2658 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2659 if (negate)
2660 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2661 return res;
2663 default:
2664 return NULL_TREE;
2668 /* Sets COMB to CST. */
2670 static void
2671 aff_combination_const (struct affine_tree_combination *comb, tree type,
2672 unsigned HOST_WIDE_INT cst)
2674 unsigned prec = TYPE_PRECISION (type);
2676 comb->type = type;
2677 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2679 comb->n = 0;
2680 comb->rest = NULL_TREE;
2681 comb->offset = cst & comb->mask;
2684 /* Sets COMB to single element ELT. */
2686 static void
2687 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2689 unsigned prec = TYPE_PRECISION (type);
2691 comb->type = type;
2692 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2694 comb->n = 1;
2695 comb->elts[0] = elt;
2696 comb->coefs[0] = 1;
2697 comb->rest = NULL_TREE;
2698 comb->offset = 0;
2701 /* Scales COMB by SCALE. */
2703 static void
2704 aff_combination_scale (struct affine_tree_combination *comb,
2705 unsigned HOST_WIDE_INT scale)
2707 unsigned i, j;
2709 if (scale == 1)
2710 return;
2712 if (scale == 0)
2714 aff_combination_const (comb, comb->type, 0);
2715 return;
2718 comb->offset = (scale * comb->offset) & comb->mask;
2719 for (i = 0, j = 0; i < comb->n; i++)
2721 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2722 comb->elts[j] = comb->elts[i];
2723 if (comb->coefs[j] != 0)
2724 j++;
2726 comb->n = j;
2728 if (comb->rest)
2730 if (comb->n < MAX_AFF_ELTS)
2732 comb->coefs[comb->n] = scale;
2733 comb->elts[comb->n] = comb->rest;
2734 comb->rest = NULL_TREE;
2735 comb->n++;
2737 else
2738 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2739 build_int_cst_type (comb->type, scale));
2743 /* Adds ELT * SCALE to COMB. */
2745 static void
2746 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2747 unsigned HOST_WIDE_INT scale)
2749 unsigned i;
2751 if (scale == 0)
2752 return;
2754 for (i = 0; i < comb->n; i++)
2755 if (operand_equal_p (comb->elts[i], elt, 0))
2757 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758 if (comb->coefs[i])
2759 return;
2761 comb->n--;
2762 comb->coefs[i] = comb->coefs[comb->n];
2763 comb->elts[i] = comb->elts[comb->n];
2764 return;
2766 if (comb->n < MAX_AFF_ELTS)
2768 comb->coefs[comb->n] = scale;
2769 comb->elts[comb->n] = elt;
2770 comb->n++;
2771 return;
2774 if (scale == 1)
2775 elt = fold_convert (comb->type, elt);
2776 else
2777 elt = fold_build2 (MULT_EXPR, comb->type,
2778 fold_convert (comb->type, elt),
2779 build_int_cst_type (comb->type, scale));
2781 if (comb->rest)
2782 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2783 else
2784 comb->rest = elt;
2787 /* Adds COMB2 to COMB1. */
2789 static void
2790 aff_combination_add (struct affine_tree_combination *comb1,
2791 struct affine_tree_combination *comb2)
2793 unsigned i;
2795 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2796 for (i = 0; i < comb2-> n; i++)
2797 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2798 if (comb2->rest)
2799 aff_combination_add_elt (comb1, comb2->rest, 1);
2802 /* Splits EXPR into an affine combination of parts. */
2804 static void
2805 tree_to_aff_combination (tree expr, tree type,
2806 struct affine_tree_combination *comb)
2808 struct affine_tree_combination tmp;
2809 enum tree_code code;
2810 tree cst, core, toffset;
2811 HOST_WIDE_INT bitpos, bitsize;
2812 enum machine_mode mode;
2813 int unsignedp, volatilep;
2815 STRIP_NOPS (expr);
2817 code = TREE_CODE (expr);
2818 switch (code)
2820 case INTEGER_CST:
2821 aff_combination_const (comb, type, int_cst_value (expr));
2822 return;
2824 case PLUS_EXPR:
2825 case MINUS_EXPR:
2826 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2827 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2828 if (code == MINUS_EXPR)
2829 aff_combination_scale (&tmp, -1);
2830 aff_combination_add (comb, &tmp);
2831 return;
2833 case MULT_EXPR:
2834 cst = TREE_OPERAND (expr, 1);
2835 if (TREE_CODE (cst) != INTEGER_CST)
2836 break;
2837 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2838 aff_combination_scale (comb, int_cst_value (cst));
2839 return;
2841 case NEGATE_EXPR:
2842 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843 aff_combination_scale (comb, -1);
2844 return;
2846 case ADDR_EXPR:
2847 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2848 &toffset, &mode, &unsignedp, &volatilep,
2849 false);
2850 if (bitpos % BITS_PER_UNIT != 0)
2851 break;
2852 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2853 core = build_fold_addr_expr (core);
2854 if (TREE_CODE (core) == ADDR_EXPR)
2855 aff_combination_add_elt (comb, core, 1);
2856 else
2858 tree_to_aff_combination (core, type, &tmp);
2859 aff_combination_add (comb, &tmp);
2861 if (toffset)
2863 tree_to_aff_combination (toffset, type, &tmp);
2864 aff_combination_add (comb, &tmp);
2866 return;
2868 default:
2869 break;
2872 aff_combination_elt (comb, type, expr);
2875 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2877 static tree
2878 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2879 unsigned HOST_WIDE_INT mask)
2881 enum tree_code code;
2883 scale &= mask;
2884 elt = fold_convert (type, elt);
2886 if (scale == 1)
2888 if (!expr)
2889 return elt;
2891 return fold_build2 (PLUS_EXPR, type, expr, elt);
2894 if (scale == mask)
2896 if (!expr)
2897 return fold_build1 (NEGATE_EXPR, type, elt);
2899 return fold_build2 (MINUS_EXPR, type, expr, elt);
2902 if (!expr)
2903 return fold_build2 (MULT_EXPR, type, elt,
2904 build_int_cst_type (type, scale));
2906 if ((scale | (mask >> 1)) == mask)
2908 /* Scale is negative. */
2909 code = MINUS_EXPR;
2910 scale = (-scale) & mask;
2912 else
2913 code = PLUS_EXPR;
2915 elt = fold_build2 (MULT_EXPR, type, elt,
2916 build_int_cst_type (type, scale));
2917 return fold_build2 (code, type, expr, elt);
2920 /* Copies the tree elements of COMB to ensure that they are not shared. */
2922 static void
2923 unshare_aff_combination (struct affine_tree_combination *comb)
2925 unsigned i;
2927 for (i = 0; i < comb->n; i++)
2928 comb->elts[i] = unshare_expr (comb->elts[i]);
2929 if (comb->rest)
2930 comb->rest = unshare_expr (comb->rest);
2933 /* Makes tree from the affine combination COMB. */
2935 static tree
2936 aff_combination_to_tree (struct affine_tree_combination *comb)
2938 tree type = comb->type;
2939 tree expr = comb->rest;
2940 unsigned i;
2941 unsigned HOST_WIDE_INT off, sgn;
2943 /* Handle the special case produced by get_computation_aff when
2944 the type does not fit in HOST_WIDE_INT. */
2945 if (comb->n == 0 && comb->offset == 0)
2946 return fold_convert (type, expr);
2948 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2950 for (i = 0; i < comb->n; i++)
2951 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2952 comb->mask);
2954 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2956 /* Offset is negative. */
2957 off = (-comb->offset) & comb->mask;
2958 sgn = comb->mask;
2960 else
2962 off = comb->offset;
2963 sgn = 1;
2965 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2966 comb->mask);
2969 /* Determines the expression by that USE is expressed from induction variable
2970 CAND at statement AT in LOOP. The expression is stored in a decomposed
2971 form into AFF. Returns false if USE cannot be expressed using CAND. */
2973 static bool
2974 get_computation_aff (struct loop *loop,
2975 struct iv_use *use, struct iv_cand *cand, tree at,
2976 struct affine_tree_combination *aff)
2978 tree ubase = use->iv->base;
2979 tree ustep = use->iv->step;
2980 tree cbase = cand->iv->base;
2981 tree cstep = cand->iv->step;
2982 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2983 tree uutype;
2984 tree expr, delta;
2985 tree ratio;
2986 unsigned HOST_WIDE_INT ustepi, cstepi;
2987 HOST_WIDE_INT ratioi;
2988 struct affine_tree_combination cbase_aff, expr_aff;
2990 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2992 /* We do not have a precision to express the values of use. */
2993 return false;
2996 expr = var_at_stmt (loop, cand, at);
2998 if (TREE_TYPE (expr) != ctype)
3000 /* This may happen with the original ivs. */
3001 expr = fold_convert (ctype, expr);
3004 if (TYPE_UNSIGNED (utype))
3005 uutype = utype;
3006 else
3008 uutype = unsigned_type_for (utype);
3009 ubase = fold_convert (uutype, ubase);
3010 ustep = fold_convert (uutype, ustep);
3013 if (uutype != ctype)
3015 expr = fold_convert (uutype, expr);
3016 cbase = fold_convert (uutype, cbase);
3017 cstep = fold_convert (uutype, cstep);
3020 if (cst_and_fits_in_hwi (cstep)
3021 && cst_and_fits_in_hwi (ustep))
3023 ustepi = int_cst_value (ustep);
3024 cstepi = int_cst_value (cstep);
3026 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3028 /* TODO maybe consider case when ustep divides cstep and the ratio is
3029 a power of 2 (so that the division is fast to execute)? We would
3030 need to be much more careful with overflows etc. then. */
3031 return false;
3034 ratio = build_int_cst_type (uutype, ratioi);
3036 else
3038 ratio = constant_multiple_of (uutype, ustep, cstep);
3039 if (!ratio)
3040 return false;
3042 /* Ratioi is only used to detect special cases when the multiplicative
3043 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3044 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3045 to integer_onep/integer_all_onesp, since the former ignores
3046 TREE_OVERFLOW. */
3047 if (cst_and_fits_in_hwi (ratio))
3048 ratioi = int_cst_value (ratio);
3049 else if (integer_onep (ratio))
3050 ratioi = 1;
3051 else if (integer_all_onesp (ratio))
3052 ratioi = -1;
3053 else
3054 ratioi = 0;
3057 /* We may need to shift the value if we are after the increment. */
3058 if (stmt_after_increment (loop, cand, at))
3059 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3061 /* use = ubase - ratio * cbase + ratio * var.
3063 In general case ubase + ratio * (var - cbase) could be better (one less
3064 multiplication), but often it is possible to eliminate redundant parts
3065 of computations from (ubase - ratio * cbase) term, and if it does not
3066 happen, fold is able to apply the distributive law to obtain this form
3067 anyway. */
3069 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3071 /* Let's compute in trees and just return the result in AFF. This case
3072 should not be very common, and fold itself is not that bad either,
3073 so making the aff. functions more complicated to handle this case
3074 is not that urgent. */
3075 if (ratioi == 1)
3077 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3078 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3080 else if (ratioi == -1)
3082 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3083 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3085 else
3087 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3088 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3089 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3090 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3093 aff->type = uutype;
3094 aff->n = 0;
3095 aff->offset = 0;
3096 aff->mask = 0;
3097 aff->rest = expr;
3098 return true;
3101 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3102 possible to compute ratioi. */
3103 gcc_assert (ratioi);
3105 tree_to_aff_combination (ubase, uutype, aff);
3106 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3107 tree_to_aff_combination (expr, uutype, &expr_aff);
3108 aff_combination_scale (&cbase_aff, -ratioi);
3109 aff_combination_scale (&expr_aff, ratioi);
3110 aff_combination_add (aff, &cbase_aff);
3111 aff_combination_add (aff, &expr_aff);
3113 return true;
3116 /* Determines the expression by that USE is expressed from induction variable
3117 CAND at statement AT in LOOP. The computation is unshared. */
3119 static tree
3120 get_computation_at (struct loop *loop,
3121 struct iv_use *use, struct iv_cand *cand, tree at)
3123 struct affine_tree_combination aff;
3124 tree type = TREE_TYPE (use->iv->base);
3126 if (!get_computation_aff (loop, use, cand, at, &aff))
3127 return NULL_TREE;
3128 unshare_aff_combination (&aff);
3129 return fold_convert (type, aff_combination_to_tree (&aff));
3132 /* Determines the expression by that USE is expressed from induction variable
3133 CAND in LOOP. The computation is unshared. */
3135 static tree
3136 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3138 return get_computation_at (loop, use, cand, use->stmt);
3141 /* Returns cost of addition in MODE. */
3143 static unsigned
3144 add_cost (enum machine_mode mode)
3146 static unsigned costs[NUM_MACHINE_MODES];
3147 rtx seq;
3148 unsigned cost;
3150 if (costs[mode])
3151 return costs[mode];
3153 start_sequence ();
3154 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3155 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3156 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3157 NULL_RTX);
3158 seq = get_insns ();
3159 end_sequence ();
3161 cost = seq_cost (seq);
3162 if (!cost)
3163 cost = 1;
3165 costs[mode] = cost;
3167 if (dump_file && (dump_flags & TDF_DETAILS))
3168 fprintf (dump_file, "Addition in %s costs %d\n",
3169 GET_MODE_NAME (mode), cost);
3170 return cost;
3173 /* Entry in a hashtable of already known costs for multiplication. */
3174 struct mbc_entry
3176 HOST_WIDE_INT cst; /* The constant to multiply by. */
3177 enum machine_mode mode; /* In mode. */
3178 unsigned cost; /* The cost. */
3181 /* Counts hash value for the ENTRY. */
3183 static hashval_t
3184 mbc_entry_hash (const void *entry)
3186 const struct mbc_entry *e = entry;
3188 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3191 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3193 static int
3194 mbc_entry_eq (const void *entry1, const void *entry2)
3196 const struct mbc_entry *e1 = entry1;
3197 const struct mbc_entry *e2 = entry2;
3199 return (e1->mode == e2->mode
3200 && e1->cst == e2->cst);
3203 /* Returns cost of multiplication by constant CST in MODE. */
3205 unsigned
3206 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3208 static htab_t costs;
3209 struct mbc_entry **cached, act;
3210 rtx seq;
3211 unsigned cost;
3213 if (!costs)
3214 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3216 act.mode = mode;
3217 act.cst = cst;
3218 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3219 if (*cached)
3220 return (*cached)->cost;
3222 *cached = xmalloc (sizeof (struct mbc_entry));
3223 (*cached)->mode = mode;
3224 (*cached)->cst = cst;
3226 start_sequence ();
3227 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3228 NULL_RTX, 0);
3229 seq = get_insns ();
3230 end_sequence ();
3232 cost = seq_cost (seq);
3234 if (dump_file && (dump_flags & TDF_DETAILS))
3235 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3236 (int) cst, GET_MODE_NAME (mode), cost);
3238 (*cached)->cost = cost;
3240 return cost;
3243 /* Returns true if multiplying by RATIO is allowed in address. */
3245 bool
3246 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3248 #define MAX_RATIO 128
3249 static sbitmap valid_mult;
3251 if (!valid_mult)
3253 rtx reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3254 rtx addr;
3255 HOST_WIDE_INT i;
3257 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3258 sbitmap_zero (valid_mult);
3259 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3260 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3262 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3263 if (memory_address_p (Pmode, addr))
3264 SET_BIT (valid_mult, i + MAX_RATIO);
3267 if (dump_file && (dump_flags & TDF_DETAILS))
3269 fprintf (dump_file, " allowed multipliers:");
3270 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3271 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3272 fprintf (dump_file, " %d", (int) i);
3273 fprintf (dump_file, "\n");
3274 fprintf (dump_file, "\n");
3278 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3279 return false;
3281 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3284 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3285 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3286 variable is omitted. The created memory accesses MODE.
3288 TODO -- there must be some better way. This all is quite crude. */
3290 static unsigned
3291 get_address_cost (bool symbol_present, bool var_present,
3292 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3294 static bool initialized = false;
3295 static HOST_WIDE_INT rat, off;
3296 static HOST_WIDE_INT min_offset, max_offset;
3297 static unsigned costs[2][2][2][2];
3298 unsigned cost, acost;
3299 rtx seq, addr, base;
3300 bool offset_p, ratio_p;
3301 rtx reg1;
3302 HOST_WIDE_INT s_offset;
3303 unsigned HOST_WIDE_INT mask;
3304 unsigned bits;
3306 if (!initialized)
3308 HOST_WIDE_INT i;
3309 initialized = true;
3311 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3313 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3314 for (i = 1; i <= 1 << 20; i <<= 1)
3316 XEXP (addr, 1) = GEN_INT (i);
3317 if (!memory_address_p (Pmode, addr))
3318 break;
3320 max_offset = i >> 1;
3321 off = max_offset;
3323 for (i = 1; i <= 1 << 20; i <<= 1)
3325 XEXP (addr, 1) = GEN_INT (-i);
3326 if (!memory_address_p (Pmode, addr))
3327 break;
3329 min_offset = -(i >> 1);
3331 if (dump_file && (dump_flags & TDF_DETAILS))
3333 fprintf (dump_file, "get_address_cost:\n");
3334 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3335 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3338 rat = 1;
3339 for (i = 2; i <= MAX_RATIO; i++)
3340 if (multiplier_allowed_in_address_p (i))
3342 rat = i;
3343 break;
3347 bits = GET_MODE_BITSIZE (Pmode);
3348 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3349 offset &= mask;
3350 if ((offset >> (bits - 1) & 1))
3351 offset |= ~mask;
3352 s_offset = offset;
3354 cost = 0;
3355 offset_p = (s_offset != 0
3356 && min_offset <= s_offset && s_offset <= max_offset);
3357 ratio_p = (ratio != 1
3358 && multiplier_allowed_in_address_p (ratio));
3360 if (ratio != 1 && !ratio_p)
3361 cost += multiply_by_cost (ratio, Pmode);
3363 if (s_offset && !offset_p && !symbol_present)
3365 cost += add_cost (Pmode);
3366 var_present = true;
3369 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3370 if (!acost)
3372 acost = 0;
3374 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3375 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3376 if (ratio_p)
3377 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3379 if (var_present)
3380 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3382 if (symbol_present)
3384 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3385 if (offset_p)
3386 base = gen_rtx_fmt_e (CONST, Pmode,
3387 gen_rtx_fmt_ee (PLUS, Pmode,
3388 base,
3389 GEN_INT (off)));
3391 else if (offset_p)
3392 base = GEN_INT (off);
3393 else
3394 base = NULL_RTX;
3396 if (base)
3397 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3399 start_sequence ();
3400 addr = memory_address (Pmode, addr);
3401 seq = get_insns ();
3402 end_sequence ();
3404 acost = seq_cost (seq);
3405 acost += address_cost (addr, Pmode);
3407 if (!acost)
3408 acost = 1;
3409 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3412 return cost + acost;
3414 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3415 invariants the computation depends on. */
3417 static unsigned
3418 force_var_cost (struct ivopts_data *data,
3419 tree expr, bitmap *depends_on)
3421 static bool costs_initialized = false;
3422 static unsigned integer_cost;
3423 static unsigned symbol_cost;
3424 static unsigned address_cost;
3425 tree op0, op1;
3426 unsigned cost0, cost1, cost;
3427 enum machine_mode mode;
3429 if (!costs_initialized)
3431 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3432 rtx x = gen_rtx_MEM (DECL_MODE (var),
3433 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3434 tree addr;
3435 tree type = build_pointer_type (integer_type_node);
3437 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3438 2000));
3440 SET_DECL_RTL (var, x);
3441 TREE_STATIC (var) = 1;
3442 addr = build1 (ADDR_EXPR, type, var);
3443 symbol_cost = computation_cost (addr) + 1;
3445 address_cost
3446 = computation_cost (build2 (PLUS_EXPR, type,
3447 addr,
3448 build_int_cst_type (type, 2000))) + 1;
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, "force_var_cost:\n");
3452 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3453 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3454 fprintf (dump_file, " address %d\n", (int) address_cost);
3455 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3456 fprintf (dump_file, "\n");
3459 costs_initialized = true;
3462 STRIP_NOPS (expr);
3464 if (depends_on)
3466 fd_ivopts_data = data;
3467 walk_tree (&expr, find_depends, depends_on, NULL);
3470 if (SSA_VAR_P (expr))
3471 return 0;
3473 if (TREE_INVARIANT (expr))
3475 if (TREE_CODE (expr) == INTEGER_CST)
3476 return integer_cost;
3478 if (TREE_CODE (expr) == ADDR_EXPR)
3480 tree obj = TREE_OPERAND (expr, 0);
3482 if (TREE_CODE (obj) == VAR_DECL
3483 || TREE_CODE (obj) == PARM_DECL
3484 || TREE_CODE (obj) == RESULT_DECL)
3485 return symbol_cost;
3488 return address_cost;
3491 switch (TREE_CODE (expr))
3493 case PLUS_EXPR:
3494 case MINUS_EXPR:
3495 case MULT_EXPR:
3496 op0 = TREE_OPERAND (expr, 0);
3497 op1 = TREE_OPERAND (expr, 1);
3498 STRIP_NOPS (op0);
3499 STRIP_NOPS (op1);
3501 if (is_gimple_val (op0))
3502 cost0 = 0;
3503 else
3504 cost0 = force_var_cost (data, op0, NULL);
3506 if (is_gimple_val (op1))
3507 cost1 = 0;
3508 else
3509 cost1 = force_var_cost (data, op1, NULL);
3511 break;
3513 default:
3514 /* Just an arbitrary value, FIXME. */
3515 return target_spill_cost;
3518 mode = TYPE_MODE (TREE_TYPE (expr));
3519 switch (TREE_CODE (expr))
3521 case PLUS_EXPR:
3522 case MINUS_EXPR:
3523 cost = add_cost (mode);
3524 break;
3526 case MULT_EXPR:
3527 if (cst_and_fits_in_hwi (op0))
3528 cost = multiply_by_cost (int_cst_value (op0), mode);
3529 else if (cst_and_fits_in_hwi (op1))
3530 cost = multiply_by_cost (int_cst_value (op1), mode);
3531 else
3532 return target_spill_cost;
3533 break;
3535 default:
3536 gcc_unreachable ();
3539 cost += cost0;
3540 cost += cost1;
3542 /* Bound the cost by target_spill_cost. The parts of complicated
3543 computations often are either loop invariant or at least can
3544 be shared between several iv uses, so letting this grow without
3545 limits would not give reasonable results. */
3546 return cost < target_spill_cost ? cost : target_spill_cost;
3549 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3550 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3551 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3552 invariants the computation depends on. */
3554 static unsigned
3555 split_address_cost (struct ivopts_data *data,
3556 tree addr, bool *symbol_present, bool *var_present,
3557 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3559 tree core;
3560 HOST_WIDE_INT bitsize;
3561 HOST_WIDE_INT bitpos;
3562 tree toffset;
3563 enum machine_mode mode;
3564 int unsignedp, volatilep;
3566 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3567 &unsignedp, &volatilep, false);
3569 if (toffset != 0
3570 || bitpos % BITS_PER_UNIT != 0
3571 || TREE_CODE (core) != VAR_DECL)
3573 *symbol_present = false;
3574 *var_present = true;
3575 fd_ivopts_data = data;
3576 walk_tree (&addr, find_depends, depends_on, NULL);
3577 return target_spill_cost;
3580 *offset += bitpos / BITS_PER_UNIT;
3581 if (TREE_STATIC (core)
3582 || DECL_EXTERNAL (core))
3584 *symbol_present = true;
3585 *var_present = false;
3586 return 0;
3589 *symbol_present = false;
3590 *var_present = true;
3591 return 0;
3594 /* Estimates cost of expressing difference of addresses E1 - E2 as
3595 var + symbol + offset. The value of offset is added to OFFSET,
3596 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3597 part is missing. DEPENDS_ON is a set of the invariants the computation
3598 depends on. */
3600 static unsigned
3601 ptr_difference_cost (struct ivopts_data *data,
3602 tree e1, tree e2, bool *symbol_present, bool *var_present,
3603 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3605 HOST_WIDE_INT diff = 0;
3606 unsigned cost;
3608 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3610 if (ptr_difference_const (e1, e2, &diff))
3612 *offset += diff;
3613 *symbol_present = false;
3614 *var_present = false;
3615 return 0;
3618 if (e2 == integer_zero_node)
3619 return split_address_cost (data, TREE_OPERAND (e1, 0),
3620 symbol_present, var_present, offset, depends_on);
3622 *symbol_present = false;
3623 *var_present = true;
3625 cost = force_var_cost (data, e1, depends_on);
3626 cost += force_var_cost (data, e2, depends_on);
3627 cost += add_cost (Pmode);
3629 return cost;
3632 /* Estimates cost of expressing difference E1 - E2 as
3633 var + symbol + offset. The value of offset is added to OFFSET,
3634 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3635 part is missing. DEPENDS_ON is a set of the invariants the computation
3636 depends on. */
3638 static unsigned
3639 difference_cost (struct ivopts_data *data,
3640 tree e1, tree e2, bool *symbol_present, bool *var_present,
3641 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3643 unsigned cost;
3644 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3645 unsigned HOST_WIDE_INT off1, off2;
3647 e1 = strip_offset (e1, &off1);
3648 e2 = strip_offset (e2, &off2);
3649 *offset += off1 - off2;
3651 STRIP_NOPS (e1);
3652 STRIP_NOPS (e2);
3654 if (TREE_CODE (e1) == ADDR_EXPR)
3655 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3656 depends_on);
3657 *symbol_present = false;
3659 if (operand_equal_p (e1, e2, 0))
3661 *var_present = false;
3662 return 0;
3664 *var_present = true;
3665 if (zero_p (e2))
3666 return force_var_cost (data, e1, depends_on);
3668 if (zero_p (e1))
3670 cost = force_var_cost (data, e2, depends_on);
3671 cost += multiply_by_cost (-1, mode);
3673 return cost;
3676 cost = force_var_cost (data, e1, depends_on);
3677 cost += force_var_cost (data, e2, depends_on);
3678 cost += add_cost (mode);
3680 return cost;
3683 /* Determines the cost of the computation by that USE is expressed
3684 from induction variable CAND. If ADDRESS_P is true, we just need
3685 to create an address from it, otherwise we want to get it into
3686 register. A set of invariants we depend on is stored in
3687 DEPENDS_ON. AT is the statement at that the value is computed. */
3689 static unsigned
3690 get_computation_cost_at (struct ivopts_data *data,
3691 struct iv_use *use, struct iv_cand *cand,
3692 bool address_p, bitmap *depends_on, tree at)
3694 tree ubase = use->iv->base, ustep = use->iv->step;
3695 tree cbase, cstep;
3696 tree utype = TREE_TYPE (ubase), ctype;
3697 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3698 HOST_WIDE_INT ratio, aratio;
3699 bool var_present, symbol_present;
3700 unsigned cost = 0, n_sums;
3702 *depends_on = NULL;
3704 /* Only consider real candidates. */
3705 if (!cand->iv)
3706 return INFTY;
3708 cbase = cand->iv->base;
3709 cstep = cand->iv->step;
3710 ctype = TREE_TYPE (cbase);
3712 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3714 /* We do not have a precision to express the values of use. */
3715 return INFTY;
3718 if (address_p)
3720 /* Do not try to express address of an object with computation based
3721 on address of a different object. This may cause problems in rtl
3722 level alias analysis (that does not expect this to be happening,
3723 as this is illegal in C), and would be unlikely to be useful
3724 anyway. */
3725 if (use->iv->base_object
3726 && cand->iv->base_object
3727 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3728 return INFTY;
3731 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3733 /* TODO -- add direct handling of this case. */
3734 goto fallback;
3737 /* CSTEPI is removed from the offset in case statement is after the
3738 increment. If the step is not constant, we use zero instead.
3739 This is a bit imprecise (there is the extra addition), but
3740 redundancy elimination is likely to transform the code so that
3741 it uses value of the variable before increment anyway,
3742 so it is not that much unrealistic. */
3743 if (cst_and_fits_in_hwi (cstep))
3744 cstepi = int_cst_value (cstep);
3745 else
3746 cstepi = 0;
3748 if (cst_and_fits_in_hwi (ustep)
3749 && cst_and_fits_in_hwi (cstep))
3751 ustepi = int_cst_value (ustep);
3753 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3754 return INFTY;
3756 else
3758 tree rat;
3760 rat = constant_multiple_of (utype, ustep, cstep);
3762 if (!rat)
3763 return INFTY;
3765 if (cst_and_fits_in_hwi (rat))
3766 ratio = int_cst_value (rat);
3767 else if (integer_onep (rat))
3768 ratio = 1;
3769 else if (integer_all_onesp (rat))
3770 ratio = -1;
3771 else
3772 return INFTY;
3775 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3776 or ratio == 1, it is better to handle this like
3778 ubase - ratio * cbase + ratio * var
3780 (also holds in the case ratio == -1, TODO. */
3782 if (cst_and_fits_in_hwi (cbase))
3784 offset = - ratio * int_cst_value (cbase);
3785 cost += difference_cost (data,
3786 ubase, integer_zero_node,
3787 &symbol_present, &var_present, &offset,
3788 depends_on);
3790 else if (ratio == 1)
3792 cost += difference_cost (data,
3793 ubase, cbase,
3794 &symbol_present, &var_present, &offset,
3795 depends_on);
3797 else
3799 cost += force_var_cost (data, cbase, depends_on);
3800 cost += add_cost (TYPE_MODE (ctype));
3801 cost += difference_cost (data,
3802 ubase, integer_zero_node,
3803 &symbol_present, &var_present, &offset,
3804 depends_on);
3807 /* If we are after the increment, the value of the candidate is higher by
3808 one iteration. */
3809 if (stmt_after_increment (data->current_loop, cand, at))
3810 offset -= ratio * cstepi;
3812 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3813 (symbol/var/const parts may be omitted). If we are looking for an address,
3814 find the cost of addressing this. */
3815 if (address_p)
3816 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3818 /* Otherwise estimate the costs for computing the expression. */
3819 aratio = ratio > 0 ? ratio : -ratio;
3820 if (!symbol_present && !var_present && !offset)
3822 if (ratio != 1)
3823 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3825 return cost;
3828 if (aratio != 1)
3829 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3831 n_sums = 1;
3832 if (var_present
3833 /* Symbol + offset should be compile-time computable. */
3834 && (symbol_present || offset))
3835 n_sums++;
3837 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3839 fallback:
3841 /* Just get the expression, expand it and measure the cost. */
3842 tree comp = get_computation_at (data->current_loop, use, cand, at);
3844 if (!comp)
3845 return INFTY;
3847 if (address_p)
3848 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3850 return computation_cost (comp);
3854 /* Determines the cost of the computation by that USE is expressed
3855 from induction variable CAND. If ADDRESS_P is true, we just need
3856 to create an address from it, otherwise we want to get it into
3857 register. A set of invariants we depend on is stored in
3858 DEPENDS_ON. */
3860 static unsigned
3861 get_computation_cost (struct ivopts_data *data,
3862 struct iv_use *use, struct iv_cand *cand,
3863 bool address_p, bitmap *depends_on)
3865 return get_computation_cost_at (data,
3866 use, cand, address_p, depends_on, use->stmt);
3869 /* Determines cost of basing replacement of USE on CAND in a generic
3870 expression. */
3872 static bool
3873 determine_use_iv_cost_generic (struct ivopts_data *data,
3874 struct iv_use *use, struct iv_cand *cand)
3876 bitmap depends_on;
3877 unsigned cost;
3879 /* The simple case first -- if we need to express value of the preserved
3880 original biv, the cost is 0. This also prevents us from counting the
3881 cost of increment twice -- once at this use and once in the cost of
3882 the candidate. */
3883 if (cand->pos == IP_ORIGINAL
3884 && cand->incremented_at == use->stmt)
3886 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3887 return true;
3890 cost = get_computation_cost (data, use, cand, false, &depends_on);
3891 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3893 return cost != INFTY;
3896 /* Determines cost of basing replacement of USE on CAND in an address. */
3898 static bool
3899 determine_use_iv_cost_address (struct ivopts_data *data,
3900 struct iv_use *use, struct iv_cand *cand)
3902 bitmap depends_on;
3903 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3905 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3907 return cost != INFTY;
3910 /* Computes value of induction variable IV in iteration NITER. */
3912 static tree
3913 iv_value (struct iv *iv, tree niter)
3915 tree val;
3916 tree type = TREE_TYPE (iv->base);
3918 niter = fold_convert (type, niter);
3919 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3921 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3924 /* Computes value of candidate CAND at position AT in iteration NITER. */
3926 static tree
3927 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3929 tree val = iv_value (cand->iv, niter);
3930 tree type = TREE_TYPE (cand->iv->base);
3932 if (stmt_after_increment (loop, cand, at))
3933 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3935 return val;
3938 /* Returns period of induction variable iv. */
3940 static tree
3941 iv_period (struct iv *iv)
3943 tree step = iv->step, period, type;
3944 tree pow2div;
3946 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3948 /* Period of the iv is gcd (step, type range). Since type range is power
3949 of two, it suffices to determine the maximum power of two that divides
3950 step. */
3951 pow2div = num_ending_zeros (step);
3952 type = unsigned_type_for (TREE_TYPE (step));
3954 period = build_low_bits_mask (type,
3955 (TYPE_PRECISION (type)
3956 - tree_low_cst (pow2div, 1)));
3958 return period;
3961 /* Returns the comparison operator used when eliminating the iv USE. */
3963 static enum tree_code
3964 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3966 struct loop *loop = data->current_loop;
3967 basic_block ex_bb;
3968 edge exit;
3970 ex_bb = bb_for_stmt (use->stmt);
3971 exit = EDGE_SUCC (ex_bb, 0);
3972 if (flow_bb_inside_loop_p (loop, exit->dest))
3973 exit = EDGE_SUCC (ex_bb, 1);
3975 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3978 /* Check whether it is possible to express the condition in USE by comparison
3979 of candidate CAND. If so, store the value compared with to BOUND. */
3981 static bool
3982 may_eliminate_iv (struct ivopts_data *data,
3983 struct iv_use *use, struct iv_cand *cand, tree *bound)
3985 basic_block ex_bb;
3986 edge exit;
3987 struct tree_niter_desc *niter;
3988 tree nit, nit_type;
3989 tree wider_type, period, per_type;
3990 struct loop *loop = data->current_loop;
3992 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3993 return false;
3995 /* For now works only for exits that dominate the loop latch. TODO -- extend
3996 for other conditions inside loop body. */
3997 ex_bb = bb_for_stmt (use->stmt);
3998 if (use->stmt != last_stmt (ex_bb)
3999 || TREE_CODE (use->stmt) != COND_EXPR)
4000 return false;
4001 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4002 return false;
4004 exit = EDGE_SUCC (ex_bb, 0);
4005 if (flow_bb_inside_loop_p (loop, exit->dest))
4006 exit = EDGE_SUCC (ex_bb, 1);
4007 if (flow_bb_inside_loop_p (loop, exit->dest))
4008 return false;
4010 niter = niter_for_exit (data, exit);
4011 if (!niter
4012 || !zero_p (niter->may_be_zero))
4013 return false;
4015 nit = niter->niter;
4016 nit_type = TREE_TYPE (nit);
4018 /* Determine whether we may use the variable to test whether niter iterations
4019 elapsed. This is the case iff the period of the induction variable is
4020 greater than the number of iterations. */
4021 period = iv_period (cand->iv);
4022 if (!period)
4023 return false;
4024 per_type = TREE_TYPE (period);
4026 wider_type = TREE_TYPE (period);
4027 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4028 wider_type = per_type;
4029 else
4030 wider_type = nit_type;
4032 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
4033 fold_convert (wider_type, period),
4034 fold_convert (wider_type, nit)))))
4035 return false;
4037 *bound = cand_value_at (loop, cand, use->stmt, nit);
4038 return true;
4041 /* Determines cost of basing replacement of USE on CAND in a condition. */
4043 static bool
4044 determine_use_iv_cost_condition (struct ivopts_data *data,
4045 struct iv_use *use, struct iv_cand *cand)
4047 tree bound = NULL_TREE, op, cond;
4048 bitmap depends_on = NULL;
4049 unsigned cost;
4051 /* Only consider real candidates. */
4052 if (!cand->iv)
4054 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4055 return false;
4058 if (may_eliminate_iv (data, use, cand, &bound))
4060 cost = force_var_cost (data, bound, &depends_on);
4062 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4063 return cost != INFTY;
4066 /* The induction variable elimination failed; just express the original
4067 giv. If it is compared with an invariant, note that we cannot get
4068 rid of it. */
4069 cost = get_computation_cost (data, use, cand, false, &depends_on);
4071 cond = *use->op_p;
4072 if (TREE_CODE (cond) != SSA_NAME)
4074 op = TREE_OPERAND (cond, 0);
4075 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4076 op = TREE_OPERAND (cond, 1);
4077 if (TREE_CODE (op) == SSA_NAME)
4079 op = get_iv (data, op)->base;
4080 fd_ivopts_data = data;
4081 walk_tree (&op, find_depends, &depends_on, NULL);
4085 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4086 return cost != INFTY;
4089 /* Checks whether it is possible to replace the final value of USE by
4090 a direct computation. If so, the formula is stored to *VALUE. */
4092 static bool
4093 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4094 tree *value)
4096 struct loop *loop = data->current_loop;
4097 edge exit;
4098 struct tree_niter_desc *niter;
4100 exit = single_dom_exit (loop);
4101 if (!exit)
4102 return false;
4104 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4105 bb_for_stmt (use->stmt)));
4107 niter = niter_for_single_dom_exit (data);
4108 if (!niter
4109 || !zero_p (niter->may_be_zero))
4110 return false;
4112 *value = iv_value (use->iv, niter->niter);
4114 return true;
4117 /* Determines cost of replacing final value of USE using CAND. */
4119 static bool
4120 determine_use_iv_cost_outer (struct ivopts_data *data,
4121 struct iv_use *use, struct iv_cand *cand)
4123 bitmap depends_on;
4124 unsigned cost;
4125 edge exit;
4126 tree value = NULL_TREE;
4127 struct loop *loop = data->current_loop;
4129 /* The simple case first -- if we need to express value of the preserved
4130 original biv, the cost is 0. This also prevents us from counting the
4131 cost of increment twice -- once at this use and once in the cost of
4132 the candidate. */
4133 if (cand->pos == IP_ORIGINAL
4134 && cand->incremented_at == use->stmt)
4136 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4137 return true;
4140 if (!cand->iv)
4142 if (!may_replace_final_value (data, use, &value))
4144 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4145 return false;
4148 depends_on = NULL;
4149 cost = force_var_cost (data, value, &depends_on);
4151 cost /= AVG_LOOP_NITER (loop);
4153 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4154 return cost != INFTY;
4157 exit = single_dom_exit (loop);
4158 if (exit)
4160 /* If there is just a single exit, we may use value of the candidate
4161 after we take it to determine the value of use. */
4162 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4163 last_stmt (exit->src));
4164 if (cost != INFTY)
4165 cost /= AVG_LOOP_NITER (loop);
4167 else
4169 /* Otherwise we just need to compute the iv. */
4170 cost = get_computation_cost (data, use, cand, false, &depends_on);
4173 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4175 return cost != INFTY;
4178 /* Determines cost of basing replacement of USE on CAND. Returns false
4179 if USE cannot be based on CAND. */
4181 static bool
4182 determine_use_iv_cost (struct ivopts_data *data,
4183 struct iv_use *use, struct iv_cand *cand)
4185 switch (use->type)
4187 case USE_NONLINEAR_EXPR:
4188 return determine_use_iv_cost_generic (data, use, cand);
4190 case USE_OUTER:
4191 return determine_use_iv_cost_outer (data, use, cand);
4193 case USE_ADDRESS:
4194 return determine_use_iv_cost_address (data, use, cand);
4196 case USE_COMPARE:
4197 return determine_use_iv_cost_condition (data, use, cand);
4199 default:
4200 gcc_unreachable ();
4204 /* Determines costs of basing the use of the iv on an iv candidate. */
4206 static void
4207 determine_use_iv_costs (struct ivopts_data *data)
4209 unsigned i, j;
4210 struct iv_use *use;
4211 struct iv_cand *cand;
4212 bitmap to_clear = BITMAP_ALLOC (NULL);
4214 alloc_use_cost_map (data);
4216 for (i = 0; i < n_iv_uses (data); i++)
4218 use = iv_use (data, i);
4220 if (data->consider_all_candidates)
4222 for (j = 0; j < n_iv_cands (data); j++)
4224 cand = iv_cand (data, j);
4225 determine_use_iv_cost (data, use, cand);
4228 else
4230 bitmap_iterator bi;
4232 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4234 cand = iv_cand (data, j);
4235 if (!determine_use_iv_cost (data, use, cand))
4236 bitmap_set_bit (to_clear, j);
4239 /* Remove the candidates for that the cost is infinite from
4240 the list of related candidates. */
4241 bitmap_and_compl_into (use->related_cands, to_clear);
4242 bitmap_clear (to_clear);
4246 BITMAP_FREE (to_clear);
4248 if (dump_file && (dump_flags & TDF_DETAILS))
4250 fprintf (dump_file, "Use-candidate costs:\n");
4252 for (i = 0; i < n_iv_uses (data); i++)
4254 use = iv_use (data, i);
4256 fprintf (dump_file, "Use %d:\n", i);
4257 fprintf (dump_file, " cand\tcost\tdepends on\n");
4258 for (j = 0; j < use->n_map_members; j++)
4260 if (!use->cost_map[j].cand
4261 || use->cost_map[j].cost == INFTY)
4262 continue;
4264 fprintf (dump_file, " %d\t%d\t",
4265 use->cost_map[j].cand->id,
4266 use->cost_map[j].cost);
4267 if (use->cost_map[j].depends_on)
4268 bitmap_print (dump_file,
4269 use->cost_map[j].depends_on, "","");
4270 fprintf (dump_file, "\n");
4273 fprintf (dump_file, "\n");
4275 fprintf (dump_file, "\n");
4279 /* Determines cost of the candidate CAND. */
4281 static void
4282 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4284 unsigned cost_base, cost_step;
4285 tree base;
4287 if (!cand->iv)
4289 cand->cost = 0;
4290 return;
4293 /* There are two costs associated with the candidate -- its increment
4294 and its initialization. The second is almost negligible for any loop
4295 that rolls enough, so we take it just very little into account. */
4297 base = cand->iv->base;
4298 cost_base = force_var_cost (data, base, NULL);
4299 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4301 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4303 /* Prefer the original iv unless we may gain something by replacing it;
4304 this is not really relevant for artificial ivs created by other
4305 passes. */
4306 if (cand->pos == IP_ORIGINAL
4307 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4308 cand->cost--;
4310 /* Prefer not to insert statements into latch unless there are some
4311 already (so that we do not create unnecessary jumps). */
4312 if (cand->pos == IP_END
4313 && empty_block_p (ip_end_pos (data->current_loop)))
4314 cand->cost++;
4317 /* Determines costs of computation of the candidates. */
4319 static void
4320 determine_iv_costs (struct ivopts_data *data)
4322 unsigned i;
4324 if (dump_file && (dump_flags & TDF_DETAILS))
4326 fprintf (dump_file, "Candidate costs:\n");
4327 fprintf (dump_file, " cand\tcost\n");
4330 for (i = 0; i < n_iv_cands (data); i++)
4332 struct iv_cand *cand = iv_cand (data, i);
4334 determine_iv_cost (data, cand);
4336 if (dump_file && (dump_flags & TDF_DETAILS))
4337 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4340 if (dump_file && (dump_flags & TDF_DETAILS))
4341 fprintf (dump_file, "\n");
4344 /* Calculates cost for having SIZE induction variables. */
4346 static unsigned
4347 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4349 return global_cost_for_size (size,
4350 loop_data (data->current_loop)->regs_used,
4351 n_iv_uses (data));
4354 /* For each size of the induction variable set determine the penalty. */
4356 static void
4357 determine_set_costs (struct ivopts_data *data)
4359 unsigned j, n;
4360 tree phi, op;
4361 struct loop *loop = data->current_loop;
4362 bitmap_iterator bi;
4364 /* We use the following model (definitely improvable, especially the
4365 cost function -- TODO):
4367 We estimate the number of registers available (using MD data), name it A.
4369 We estimate the number of registers used by the loop, name it U. This
4370 number is obtained as the number of loop phi nodes (not counting virtual
4371 registers and bivs) + the number of variables from outside of the loop.
4373 We set a reserve R (free regs that are used for temporary computations,
4374 etc.). For now the reserve is a constant 3.
4376 Let I be the number of induction variables.
4378 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4379 make a lot of ivs without a reason).
4380 -- if A - R < U + I <= A, the cost is I * PRES_COST
4381 -- if U + I > A, the cost is I * PRES_COST and
4382 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4384 if (dump_file && (dump_flags & TDF_DETAILS))
4386 fprintf (dump_file, "Global costs:\n");
4387 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4388 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4389 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4390 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4393 n = 0;
4394 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4396 op = PHI_RESULT (phi);
4398 if (!is_gimple_reg (op))
4399 continue;
4401 if (get_iv (data, op))
4402 continue;
4404 n++;
4407 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4409 struct version_info *info = ver_info (data, j);
4411 if (info->inv_id && info->has_nonlin_use)
4412 n++;
4415 loop_data (loop)->regs_used = n;
4416 if (dump_file && (dump_flags & TDF_DETAILS))
4417 fprintf (dump_file, " regs_used %d\n", n);
4419 if (dump_file && (dump_flags & TDF_DETAILS))
4421 fprintf (dump_file, " cost for size:\n");
4422 fprintf (dump_file, " ivs\tcost\n");
4423 for (j = 0; j <= 2 * target_avail_regs; j++)
4424 fprintf (dump_file, " %d\t%d\n", j,
4425 ivopts_global_cost_for_size (data, j));
4426 fprintf (dump_file, "\n");
4430 /* Returns true if A is a cheaper cost pair than B. */
4432 static bool
4433 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4435 if (!a)
4436 return false;
4438 if (!b)
4439 return true;
4441 if (a->cost < b->cost)
4442 return true;
4444 if (a->cost > b->cost)
4445 return false;
4447 /* In case the costs are the same, prefer the cheaper candidate. */
4448 if (a->cand->cost < b->cand->cost)
4449 return true;
4451 return false;
4454 /* Computes the cost field of IVS structure. */
4456 static void
4457 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4459 unsigned cost = 0;
4461 cost += ivs->cand_use_cost;
4462 cost += ivs->cand_cost;
4463 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4465 ivs->cost = cost;
4468 /* Remove invariants in set INVS to set IVS. */
4470 static void
4471 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4473 bitmap_iterator bi;
4474 unsigned iid;
4476 if (!invs)
4477 return;
4479 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4481 ivs->n_invariant_uses[iid]--;
4482 if (ivs->n_invariant_uses[iid] == 0)
4483 ivs->n_regs--;
4487 /* Set USE not to be expressed by any candidate in IVS. */
4489 static void
4490 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4491 struct iv_use *use)
4493 unsigned uid = use->id, cid;
4494 struct cost_pair *cp;
4496 cp = ivs->cand_for_use[uid];
4497 if (!cp)
4498 return;
4499 cid = cp->cand->id;
4501 ivs->bad_uses++;
4502 ivs->cand_for_use[uid] = NULL;
4503 ivs->n_cand_uses[cid]--;
4505 if (ivs->n_cand_uses[cid] == 0)
4507 bitmap_clear_bit (ivs->cands, cid);
4508 /* Do not count the pseudocandidates. */
4509 if (cp->cand->iv)
4510 ivs->n_regs--;
4511 ivs->n_cands--;
4512 ivs->cand_cost -= cp->cand->cost;
4514 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4517 ivs->cand_use_cost -= cp->cost;
4519 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4520 iv_ca_recount_cost (data, ivs);
4523 /* Add invariants in set INVS to set IVS. */
4525 static void
4526 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4528 bitmap_iterator bi;
4529 unsigned iid;
4531 if (!invs)
4532 return;
4534 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4536 ivs->n_invariant_uses[iid]++;
4537 if (ivs->n_invariant_uses[iid] == 1)
4538 ivs->n_regs++;
4542 /* Set cost pair for USE in set IVS to CP. */
4544 static void
4545 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4546 struct iv_use *use, struct cost_pair *cp)
4548 unsigned uid = use->id, cid;
4550 if (ivs->cand_for_use[uid] == cp)
4551 return;
4553 if (ivs->cand_for_use[uid])
4554 iv_ca_set_no_cp (data, ivs, use);
4556 if (cp)
4558 cid = cp->cand->id;
4560 ivs->bad_uses--;
4561 ivs->cand_for_use[uid] = cp;
4562 ivs->n_cand_uses[cid]++;
4563 if (ivs->n_cand_uses[cid] == 1)
4565 bitmap_set_bit (ivs->cands, cid);
4566 /* Do not count the pseudocandidates. */
4567 if (cp->cand->iv)
4568 ivs->n_regs++;
4569 ivs->n_cands++;
4570 ivs->cand_cost += cp->cand->cost;
4572 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4575 ivs->cand_use_cost += cp->cost;
4576 iv_ca_set_add_invariants (ivs, cp->depends_on);
4577 iv_ca_recount_cost (data, ivs);
4581 /* Extend set IVS by expressing USE by some of the candidates in it
4582 if possible. */
4584 static void
4585 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4586 struct iv_use *use)
4588 struct cost_pair *best_cp = NULL, *cp;
4589 bitmap_iterator bi;
4590 unsigned i;
4592 gcc_assert (ivs->upto >= use->id);
4594 if (ivs->upto == use->id)
4596 ivs->upto++;
4597 ivs->bad_uses++;
4600 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4602 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4604 if (cheaper_cost_pair (cp, best_cp))
4605 best_cp = cp;
4608 iv_ca_set_cp (data, ivs, use, best_cp);
4611 /* Get cost for assignment IVS. */
4613 static unsigned
4614 iv_ca_cost (struct iv_ca *ivs)
4616 return (ivs->bad_uses ? INFTY : ivs->cost);
4619 /* Returns true if all dependences of CP are among invariants in IVS. */
4621 static bool
4622 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4624 unsigned i;
4625 bitmap_iterator bi;
4627 if (!cp->depends_on)
4628 return true;
4630 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4632 if (ivs->n_invariant_uses[i] == 0)
4633 return false;
4636 return true;
4639 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4640 it before NEXT_CHANGE. */
4642 static struct iv_ca_delta *
4643 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4644 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4646 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4648 change->use = use;
4649 change->old_cp = old_cp;
4650 change->new_cp = new_cp;
4651 change->next_change = next_change;
4653 return change;
4656 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4657 are rewritten. */
4659 static struct iv_ca_delta *
4660 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4662 struct iv_ca_delta *last;
4664 if (!l2)
4665 return l1;
4667 if (!l1)
4668 return l2;
4670 for (last = l1; last->next_change; last = last->next_change)
4671 continue;
4672 last->next_change = l2;
4674 return l1;
4677 /* Returns candidate by that USE is expressed in IVS. */
4679 static struct cost_pair *
4680 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4682 return ivs->cand_for_use[use->id];
4685 /* Reverse the list of changes DELTA, forming the inverse to it. */
4687 static struct iv_ca_delta *
4688 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4690 struct iv_ca_delta *act, *next, *prev = NULL;
4691 struct cost_pair *tmp;
4693 for (act = delta; act; act = next)
4695 next = act->next_change;
4696 act->next_change = prev;
4697 prev = act;
4699 tmp = act->old_cp;
4700 act->old_cp = act->new_cp;
4701 act->new_cp = tmp;
4704 return prev;
4707 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4708 reverted instead. */
4710 static void
4711 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4712 struct iv_ca_delta *delta, bool forward)
4714 struct cost_pair *from, *to;
4715 struct iv_ca_delta *act;
4717 if (!forward)
4718 delta = iv_ca_delta_reverse (delta);
4720 for (act = delta; act; act = act->next_change)
4722 from = act->old_cp;
4723 to = act->new_cp;
4724 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4725 iv_ca_set_cp (data, ivs, act->use, to);
4728 if (!forward)
4729 iv_ca_delta_reverse (delta);
4732 /* Returns true if CAND is used in IVS. */
4734 static bool
4735 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4737 return ivs->n_cand_uses[cand->id] > 0;
4740 /* Returns number of induction variable candidates in the set IVS. */
4742 static unsigned
4743 iv_ca_n_cands (struct iv_ca *ivs)
4745 return ivs->n_cands;
4748 /* Free the list of changes DELTA. */
4750 static void
4751 iv_ca_delta_free (struct iv_ca_delta **delta)
4753 struct iv_ca_delta *act, *next;
4755 for (act = *delta; act; act = next)
4757 next = act->next_change;
4758 free (act);
4761 *delta = NULL;
4764 /* Allocates new iv candidates assignment. */
4766 static struct iv_ca *
4767 iv_ca_new (struct ivopts_data *data)
4769 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4771 nw->upto = 0;
4772 nw->bad_uses = 0;
4773 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4774 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4775 nw->cands = BITMAP_ALLOC (NULL);
4776 nw->n_cands = 0;
4777 nw->n_regs = 0;
4778 nw->cand_use_cost = 0;
4779 nw->cand_cost = 0;
4780 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4781 nw->cost = 0;
4783 return nw;
4786 /* Free memory occupied by the set IVS. */
4788 static void
4789 iv_ca_free (struct iv_ca **ivs)
4791 free ((*ivs)->cand_for_use);
4792 free ((*ivs)->n_cand_uses);
4793 BITMAP_FREE ((*ivs)->cands);
4794 free ((*ivs)->n_invariant_uses);
4795 free (*ivs);
4796 *ivs = NULL;
4799 /* Dumps IVS to FILE. */
4801 static void
4802 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4804 const char *pref = " invariants ";
4805 unsigned i;
4807 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4808 bitmap_print (file, ivs->cands, " candidates ","\n");
4810 for (i = 1; i <= data->max_inv_id; i++)
4811 if (ivs->n_invariant_uses[i])
4813 fprintf (file, "%s%d", pref, i);
4814 pref = ", ";
4816 fprintf (file, "\n");
4819 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4820 new set, and store differences in DELTA. Number of induction variables
4821 in the new set is stored to N_IVS. */
4823 static unsigned
4824 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4825 struct iv_cand *cand, struct iv_ca_delta **delta,
4826 unsigned *n_ivs)
4828 unsigned i, cost;
4829 struct iv_use *use;
4830 struct cost_pair *old_cp, *new_cp;
4832 *delta = NULL;
4833 for (i = 0; i < ivs->upto; i++)
4835 use = iv_use (data, i);
4836 old_cp = iv_ca_cand_for_use (ivs, use);
4838 if (old_cp
4839 && old_cp->cand == cand)
4840 continue;
4842 new_cp = get_use_iv_cost (data, use, cand);
4843 if (!new_cp)
4844 continue;
4846 if (!iv_ca_has_deps (ivs, new_cp))
4847 continue;
4849 if (!cheaper_cost_pair (new_cp, old_cp))
4850 continue;
4852 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4855 iv_ca_delta_commit (data, ivs, *delta, true);
4856 cost = iv_ca_cost (ivs);
4857 if (n_ivs)
4858 *n_ivs = iv_ca_n_cands (ivs);
4859 iv_ca_delta_commit (data, ivs, *delta, false);
4861 return cost;
4864 /* Try narrowing set IVS by removing CAND. Return the cost of
4865 the new set and store the differences in DELTA. */
4867 static unsigned
4868 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4869 struct iv_cand *cand, struct iv_ca_delta **delta)
4871 unsigned i, ci;
4872 struct iv_use *use;
4873 struct cost_pair *old_cp, *new_cp, *cp;
4874 bitmap_iterator bi;
4875 struct iv_cand *cnd;
4876 unsigned cost;
4878 *delta = NULL;
4879 for (i = 0; i < n_iv_uses (data); i++)
4881 use = iv_use (data, i);
4883 old_cp = iv_ca_cand_for_use (ivs, use);
4884 if (old_cp->cand != cand)
4885 continue;
4887 new_cp = NULL;
4889 if (data->consider_all_candidates)
4891 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4893 if (ci == cand->id)
4894 continue;
4896 cnd = iv_cand (data, ci);
4898 cp = get_use_iv_cost (data, use, cnd);
4899 if (!cp)
4900 continue;
4901 if (!iv_ca_has_deps (ivs, cp))
4902 continue;
4904 if (!cheaper_cost_pair (cp, new_cp))
4905 continue;
4907 new_cp = cp;
4910 else
4912 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4914 if (ci == cand->id)
4915 continue;
4917 cnd = iv_cand (data, ci);
4919 cp = get_use_iv_cost (data, use, cnd);
4920 if (!cp)
4921 continue;
4922 if (!iv_ca_has_deps (ivs, cp))
4923 continue;
4925 if (!cheaper_cost_pair (cp, new_cp))
4926 continue;
4928 new_cp = cp;
4932 if (!new_cp)
4934 iv_ca_delta_free (delta);
4935 return INFTY;
4938 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4941 iv_ca_delta_commit (data, ivs, *delta, true);
4942 cost = iv_ca_cost (ivs);
4943 iv_ca_delta_commit (data, ivs, *delta, false);
4945 return cost;
4948 /* Try optimizing the set of candidates IVS by removing candidates different
4949 from to EXCEPT_CAND from it. Return cost of the new set, and store
4950 differences in DELTA. */
4952 static unsigned
4953 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4954 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4956 bitmap_iterator bi;
4957 struct iv_ca_delta *act_delta, *best_delta;
4958 unsigned i, best_cost, acost;
4959 struct iv_cand *cand;
4961 best_delta = NULL;
4962 best_cost = iv_ca_cost (ivs);
4964 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4966 cand = iv_cand (data, i);
4968 if (cand == except_cand)
4969 continue;
4971 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4973 if (acost < best_cost)
4975 best_cost = acost;
4976 iv_ca_delta_free (&best_delta);
4977 best_delta = act_delta;
4979 else
4980 iv_ca_delta_free (&act_delta);
4983 if (!best_delta)
4985 *delta = NULL;
4986 return best_cost;
4989 /* Recurse to possibly remove other unnecessary ivs. */
4990 iv_ca_delta_commit (data, ivs, best_delta, true);
4991 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4992 iv_ca_delta_commit (data, ivs, best_delta, false);
4993 *delta = iv_ca_delta_join (best_delta, *delta);
4994 return best_cost;
4997 /* Tries to extend the sets IVS in the best possible way in order
4998 to express the USE. */
5000 static bool
5001 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5002 struct iv_use *use)
5004 unsigned best_cost, act_cost;
5005 unsigned i;
5006 bitmap_iterator bi;
5007 struct iv_cand *cand;
5008 struct iv_ca_delta *best_delta = NULL, *act_delta;
5009 struct cost_pair *cp;
5011 iv_ca_add_use (data, ivs, use);
5012 best_cost = iv_ca_cost (ivs);
5014 cp = iv_ca_cand_for_use (ivs, use);
5015 if (cp)
5017 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5018 iv_ca_set_no_cp (data, ivs, use);
5021 /* First try important candidates. Only if it fails, try the specific ones.
5022 Rationale -- in loops with many variables the best choice often is to use
5023 just one generic biv. If we added here many ivs specific to the uses,
5024 the optimization algorithm later would be likely to get stuck in a local
5025 minimum, thus causing us to create too many ivs. The approach from
5026 few ivs to more seems more likely to be successful -- starting from few
5027 ivs, replacing an expensive use by a specific iv should always be a
5028 win. */
5029 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5031 cand = iv_cand (data, i);
5033 if (iv_ca_cand_used_p (ivs, cand))
5034 continue;
5036 cp = get_use_iv_cost (data, use, cand);
5037 if (!cp)
5038 continue;
5040 iv_ca_set_cp (data, ivs, use, cp);
5041 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5042 iv_ca_set_no_cp (data, ivs, use);
5043 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5045 if (act_cost < best_cost)
5047 best_cost = act_cost;
5049 iv_ca_delta_free (&best_delta);
5050 best_delta = act_delta;
5052 else
5053 iv_ca_delta_free (&act_delta);
5056 if (best_cost == INFTY)
5058 for (i = 0; i < use->n_map_members; i++)
5060 cp = use->cost_map + i;
5061 cand = cp->cand;
5062 if (!cand)
5063 continue;
5065 /* Already tried this. */
5066 if (cand->important)
5067 continue;
5069 if (iv_ca_cand_used_p (ivs, cand))
5070 continue;
5072 act_delta = NULL;
5073 iv_ca_set_cp (data, ivs, use, cp);
5074 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5075 iv_ca_set_no_cp (data, ivs, use);
5076 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5077 cp, act_delta);
5079 if (act_cost < best_cost)
5081 best_cost = act_cost;
5083 if (best_delta)
5084 iv_ca_delta_free (&best_delta);
5085 best_delta = act_delta;
5087 else
5088 iv_ca_delta_free (&act_delta);
5092 iv_ca_delta_commit (data, ivs, best_delta, true);
5093 iv_ca_delta_free (&best_delta);
5095 return (best_cost != INFTY);
5098 /* Finds an initial assignment of candidates to uses. */
5100 static struct iv_ca *
5101 get_initial_solution (struct ivopts_data *data)
5103 struct iv_ca *ivs = iv_ca_new (data);
5104 unsigned i;
5106 for (i = 0; i < n_iv_uses (data); i++)
5107 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5109 iv_ca_free (&ivs);
5110 return NULL;
5113 return ivs;
5116 /* Tries to improve set of induction variables IVS. */
5118 static bool
5119 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5121 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5122 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5123 struct iv_cand *cand;
5125 /* Try extending the set of induction variables by one. */
5126 for (i = 0; i < n_iv_cands (data); i++)
5128 cand = iv_cand (data, i);
5130 if (iv_ca_cand_used_p (ivs, cand))
5131 continue;
5133 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5134 if (!act_delta)
5135 continue;
5137 /* If we successfully added the candidate and the set is small enough,
5138 try optimizing it by removing other candidates. */
5139 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5141 iv_ca_delta_commit (data, ivs, act_delta, true);
5142 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5143 iv_ca_delta_commit (data, ivs, act_delta, false);
5144 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5147 if (acost < best_cost)
5149 best_cost = acost;
5150 iv_ca_delta_free (&best_delta);
5151 best_delta = act_delta;
5153 else
5154 iv_ca_delta_free (&act_delta);
5157 if (!best_delta)
5159 /* Try removing the candidates from the set instead. */
5160 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5162 /* Nothing more we can do. */
5163 if (!best_delta)
5164 return false;
5167 iv_ca_delta_commit (data, ivs, best_delta, true);
5168 gcc_assert (best_cost == iv_ca_cost (ivs));
5169 iv_ca_delta_free (&best_delta);
5170 return true;
5173 /* Attempts to find the optimal set of induction variables. We do simple
5174 greedy heuristic -- we try to replace at most one candidate in the selected
5175 solution and remove the unused ivs while this improves the cost. */
5177 static struct iv_ca *
5178 find_optimal_iv_set (struct ivopts_data *data)
5180 unsigned i;
5181 struct iv_ca *set;
5182 struct iv_use *use;
5184 /* Get the initial solution. */
5185 set = get_initial_solution (data);
5186 if (!set)
5188 if (dump_file && (dump_flags & TDF_DETAILS))
5189 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5190 return NULL;
5193 if (dump_file && (dump_flags & TDF_DETAILS))
5195 fprintf (dump_file, "Initial set of candidates:\n");
5196 iv_ca_dump (data, dump_file, set);
5199 while (try_improve_iv_set (data, set))
5201 if (dump_file && (dump_flags & TDF_DETAILS))
5203 fprintf (dump_file, "Improved to:\n");
5204 iv_ca_dump (data, dump_file, set);
5208 if (dump_file && (dump_flags & TDF_DETAILS))
5209 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5211 for (i = 0; i < n_iv_uses (data); i++)
5213 use = iv_use (data, i);
5214 use->selected = iv_ca_cand_for_use (set, use)->cand;
5217 return set;
5220 /* Creates a new induction variable corresponding to CAND. */
5222 static void
5223 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5225 block_stmt_iterator incr_pos;
5226 tree base;
5227 bool after = false;
5229 if (!cand->iv)
5230 return;
5232 switch (cand->pos)
5234 case IP_NORMAL:
5235 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5236 break;
5238 case IP_END:
5239 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5240 after = true;
5241 break;
5243 case IP_ORIGINAL:
5244 /* Mark that the iv is preserved. */
5245 name_info (data, cand->var_before)->preserve_biv = true;
5246 name_info (data, cand->var_after)->preserve_biv = true;
5248 /* Rewrite the increment so that it uses var_before directly. */
5249 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5251 return;
5254 gimple_add_tmp_var (cand->var_before);
5255 add_referenced_tmp_var (cand->var_before);
5257 base = unshare_expr (cand->iv->base);
5259 create_iv (base, unshare_expr (cand->iv->step),
5260 cand->var_before, data->current_loop,
5261 &incr_pos, after, &cand->var_before, &cand->var_after);
5264 /* Creates new induction variables described in SET. */
5266 static void
5267 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5269 unsigned i;
5270 struct iv_cand *cand;
5271 bitmap_iterator bi;
5273 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5275 cand = iv_cand (data, i);
5276 create_new_iv (data, cand);
5280 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5281 is true, remove also the ssa name defined by the statement. */
5283 static void
5284 remove_statement (tree stmt, bool including_defined_name)
5286 if (TREE_CODE (stmt) == PHI_NODE)
5288 if (!including_defined_name)
5290 /* Prevent the ssa name defined by the statement from being removed. */
5291 SET_PHI_RESULT (stmt, NULL);
5293 remove_phi_node (stmt, NULL_TREE);
5295 else
5297 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5299 bsi_remove (&bsi);
5303 /* Rewrites USE (definition of iv used in a nonlinear expression)
5304 using candidate CAND. */
5306 static void
5307 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5308 struct iv_use *use, struct iv_cand *cand)
5310 tree comp;
5311 tree op, stmts, tgt, ass;
5312 block_stmt_iterator bsi, pbsi;
5314 /* An important special case -- if we are asked to express value of
5315 the original iv by itself, just exit; there is no need to
5316 introduce a new computation (that might also need casting the
5317 variable to unsigned and back). */
5318 if (cand->pos == IP_ORIGINAL
5319 && TREE_CODE (use->stmt) == MODIFY_EXPR
5320 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5322 op = TREE_OPERAND (use->stmt, 1);
5324 /* Be a bit careful. In case variable is expressed in some
5325 complicated way, rewrite it so that we may get rid of this
5326 complicated expression. */
5327 if ((TREE_CODE (op) == PLUS_EXPR
5328 || TREE_CODE (op) == MINUS_EXPR)
5329 && TREE_OPERAND (op, 0) == cand->var_before
5330 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5331 return;
5334 comp = get_computation (data->current_loop, use, cand);
5335 switch (TREE_CODE (use->stmt))
5337 case PHI_NODE:
5338 tgt = PHI_RESULT (use->stmt);
5340 /* If we should keep the biv, do not replace it. */
5341 if (name_info (data, tgt)->preserve_biv)
5342 return;
5344 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5345 while (!bsi_end_p (pbsi)
5346 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5348 bsi = pbsi;
5349 bsi_next (&pbsi);
5351 break;
5353 case MODIFY_EXPR:
5354 tgt = TREE_OPERAND (use->stmt, 0);
5355 bsi = bsi_for_stmt (use->stmt);
5356 break;
5358 default:
5359 gcc_unreachable ();
5362 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5364 if (TREE_CODE (use->stmt) == PHI_NODE)
5366 if (stmts)
5367 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5368 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5369 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5370 remove_statement (use->stmt, false);
5371 SSA_NAME_DEF_STMT (tgt) = ass;
5373 else
5375 if (stmts)
5376 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5377 TREE_OPERAND (use->stmt, 1) = op;
5381 /* Replaces ssa name in index IDX by its basic variable. Callback for
5382 for_each_index. */
5384 static bool
5385 idx_remove_ssa_names (tree base, tree *idx,
5386 void *data ATTRIBUTE_UNUSED)
5388 tree *op;
5390 if (TREE_CODE (*idx) == SSA_NAME)
5391 *idx = SSA_NAME_VAR (*idx);
5393 if (TREE_CODE (base) == ARRAY_REF)
5395 op = &TREE_OPERAND (base, 2);
5396 if (*op
5397 && TREE_CODE (*op) == SSA_NAME)
5398 *op = SSA_NAME_VAR (*op);
5399 op = &TREE_OPERAND (base, 3);
5400 if (*op
5401 && TREE_CODE (*op) == SSA_NAME)
5402 *op = SSA_NAME_VAR (*op);
5405 return true;
5408 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5410 static tree
5411 unshare_and_remove_ssa_names (tree ref)
5413 ref = unshare_expr (ref);
5414 for_each_index (&ref, idx_remove_ssa_names, NULL);
5416 return ref;
5419 /* Extract the alias analysis info for the memory reference REF. There are
5420 several ways how this information may be stored and what precisely is
5421 its semantics depending on the type of the reference, but there always is
5422 somewhere hidden one _DECL node that is used to determine the set of
5423 virtual operands for the reference. The code below deciphers this jungle
5424 and extracts this single useful piece of information. */
5426 static tree
5427 get_ref_tag (tree ref)
5429 tree var = get_base_address (ref);
5430 tree tag;
5432 if (!var)
5433 return NULL_TREE;
5435 if (TREE_CODE (var) == INDIRECT_REF)
5436 var = TREE_OPERAND (var, 0);
5437 if (TREE_CODE (var) == SSA_NAME)
5439 if (SSA_NAME_PTR_INFO (var))
5441 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5442 if (tag)
5443 return tag;
5446 var = SSA_NAME_VAR (var);
5449 if (DECL_P (var))
5451 tag = var_ann (var)->type_mem_tag;
5452 if (tag)
5453 return tag;
5455 return var;
5458 return NULL_TREE;
5461 /* Copies the reference information from OLD_REF to NEW_REF. */
5463 static void
5464 copy_ref_info (tree new_ref, tree old_ref)
5466 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5467 copy_mem_ref_info (new_ref, old_ref);
5468 else
5470 TMR_TAG (new_ref) = get_ref_tag (old_ref);
5471 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5475 /* Rewrites USE (address that is an iv) using candidate CAND. */
5477 static void
5478 rewrite_use_address (struct ivopts_data *data,
5479 struct iv_use *use, struct iv_cand *cand)
5481 struct affine_tree_combination aff;
5482 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5483 tree ref;
5485 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5486 unshare_aff_combination (&aff);
5488 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5489 copy_ref_info (ref, *use->op_p);
5490 *use->op_p = ref;
5493 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5494 candidate CAND. */
5496 static void
5497 rewrite_use_compare (struct ivopts_data *data,
5498 struct iv_use *use, struct iv_cand *cand)
5500 tree comp;
5501 tree *op_p, cond, op, stmts, bound;
5502 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5503 enum tree_code compare;
5504 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5506 bound = cp->value;
5507 if (bound)
5509 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5510 tree var_type = TREE_TYPE (var);
5512 compare = iv_elimination_compare (data, use);
5513 bound = fold_convert (var_type, bound);
5514 op = force_gimple_operand (unshare_expr (bound), &stmts,
5515 true, NULL_TREE);
5517 if (stmts)
5518 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5520 *use->op_p = build2 (compare, boolean_type_node, var, op);
5521 update_stmt (use->stmt);
5522 return;
5525 /* The induction variable elimination failed; just express the original
5526 giv. */
5527 comp = get_computation (data->current_loop, use, cand);
5529 cond = *use->op_p;
5530 op_p = &TREE_OPERAND (cond, 0);
5531 if (TREE_CODE (*op_p) != SSA_NAME
5532 || zero_p (get_iv (data, *op_p)->step))
5533 op_p = &TREE_OPERAND (cond, 1);
5535 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5536 if (stmts)
5537 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5539 *op_p = op;
5542 /* Ensure that operand *OP_P may be used at the end of EXIT without
5543 violating loop closed ssa form. */
5545 static void
5546 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5548 basic_block def_bb;
5549 struct loop *def_loop;
5550 tree phi, use;
5552 use = USE_FROM_PTR (op_p);
5553 if (TREE_CODE (use) != SSA_NAME)
5554 return;
5556 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5557 if (!def_bb)
5558 return;
5560 def_loop = def_bb->loop_father;
5561 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5562 return;
5564 /* Try finding a phi node that copies the value out of the loop. */
5565 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5566 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5567 break;
5569 if (!phi)
5571 /* Create such a phi node. */
5572 tree new_name = duplicate_ssa_name (use, NULL);
5574 phi = create_phi_node (new_name, exit->dest);
5575 SSA_NAME_DEF_STMT (new_name) = phi;
5576 add_phi_arg (phi, use, exit);
5579 SET_USE (op_p, PHI_RESULT (phi));
5582 /* Ensure that operands of STMT may be used at the end of EXIT without
5583 violating loop closed ssa form. */
5585 static void
5586 protect_loop_closed_ssa_form (edge exit, tree stmt)
5588 ssa_op_iter iter;
5589 use_operand_p use_p;
5591 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5592 protect_loop_closed_ssa_form_use (exit, use_p);
5595 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5596 so that they are emitted on the correct place, and so that the loop closed
5597 ssa form is preserved. */
5599 static void
5600 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5602 tree_stmt_iterator tsi;
5603 block_stmt_iterator bsi;
5604 tree phi, stmt, def, next;
5606 if (!single_pred_p (exit->dest))
5607 split_loop_exit_edge (exit);
5609 /* Ensure there is label in exit->dest, so that we can
5610 insert after it. */
5611 tree_block_label (exit->dest);
5612 bsi = bsi_after_labels (exit->dest);
5614 if (TREE_CODE (stmts) == STATEMENT_LIST)
5616 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5618 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5619 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5622 else
5624 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5625 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5628 if (!op)
5629 return;
5631 for (phi = phi_nodes (exit->dest); phi; phi = next)
5633 next = PHI_CHAIN (phi);
5635 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5637 def = PHI_RESULT (phi);
5638 remove_statement (phi, false);
5639 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5640 def, op);
5641 SSA_NAME_DEF_STMT (def) = stmt;
5642 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5647 /* Rewrites the final value of USE (that is only needed outside of the loop)
5648 using candidate CAND. */
5650 static void
5651 rewrite_use_outer (struct ivopts_data *data,
5652 struct iv_use *use, struct iv_cand *cand)
5654 edge exit;
5655 tree value, op, stmts, tgt;
5656 tree phi;
5658 switch (TREE_CODE (use->stmt))
5660 case PHI_NODE:
5661 tgt = PHI_RESULT (use->stmt);
5662 break;
5663 case MODIFY_EXPR:
5664 tgt = TREE_OPERAND (use->stmt, 0);
5665 break;
5666 default:
5667 gcc_unreachable ();
5670 exit = single_dom_exit (data->current_loop);
5672 if (exit)
5674 if (!cand->iv)
5676 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5677 value = cp->value;
5679 else
5680 value = get_computation_at (data->current_loop,
5681 use, cand, last_stmt (exit->src));
5683 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5685 /* If we will preserve the iv anyway and we would need to perform
5686 some computation to replace the final value, do nothing. */
5687 if (stmts && name_info (data, tgt)->preserve_biv)
5688 return;
5690 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5692 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5694 if (USE_FROM_PTR (use_p) == tgt)
5695 SET_USE (use_p, op);
5698 if (stmts)
5699 compute_phi_arg_on_exit (exit, stmts, op);
5701 /* Enable removal of the statement. We cannot remove it directly,
5702 since we may still need the aliasing information attached to the
5703 ssa name defined by it. */
5704 name_info (data, tgt)->iv->have_use_for = false;
5705 return;
5708 /* If the variable is going to be preserved anyway, there is nothing to
5709 do. */
5710 if (name_info (data, tgt)->preserve_biv)
5711 return;
5713 /* Otherwise we just need to compute the iv. */
5714 rewrite_use_nonlinear_expr (data, use, cand);
5717 /* Rewrites USE using candidate CAND. */
5719 static void
5720 rewrite_use (struct ivopts_data *data,
5721 struct iv_use *use, struct iv_cand *cand)
5723 switch (use->type)
5725 case USE_NONLINEAR_EXPR:
5726 rewrite_use_nonlinear_expr (data, use, cand);
5727 break;
5729 case USE_OUTER:
5730 rewrite_use_outer (data, use, cand);
5731 break;
5733 case USE_ADDRESS:
5734 rewrite_use_address (data, use, cand);
5735 break;
5737 case USE_COMPARE:
5738 rewrite_use_compare (data, use, cand);
5739 break;
5741 default:
5742 gcc_unreachable ();
5744 update_stmt (use->stmt);
5747 /* Rewrite the uses using the selected induction variables. */
5749 static void
5750 rewrite_uses (struct ivopts_data *data)
5752 unsigned i;
5753 struct iv_cand *cand;
5754 struct iv_use *use;
5756 for (i = 0; i < n_iv_uses (data); i++)
5758 use = iv_use (data, i);
5759 cand = use->selected;
5760 gcc_assert (cand);
5762 rewrite_use (data, use, cand);
5766 /* Removes the ivs that are not used after rewriting. */
5768 static void
5769 remove_unused_ivs (struct ivopts_data *data)
5771 unsigned j;
5772 bitmap_iterator bi;
5774 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5776 struct version_info *info;
5778 info = ver_info (data, j);
5779 if (info->iv
5780 && !zero_p (info->iv->step)
5781 && !info->inv_id
5782 && !info->iv->have_use_for
5783 && !info->preserve_biv)
5784 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5788 /* Frees data allocated by the optimization of a single loop. */
5790 static void
5791 free_loop_data (struct ivopts_data *data)
5793 unsigned i, j;
5794 bitmap_iterator bi;
5795 tree obj;
5797 htab_empty (data->niters);
5799 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5801 struct version_info *info;
5803 info = ver_info (data, i);
5804 if (info->iv)
5805 free (info->iv);
5806 info->iv = NULL;
5807 info->has_nonlin_use = false;
5808 info->preserve_biv = false;
5809 info->inv_id = 0;
5811 bitmap_clear (data->relevant);
5812 bitmap_clear (data->important_candidates);
5814 for (i = 0; i < n_iv_uses (data); i++)
5816 struct iv_use *use = iv_use (data, i);
5818 free (use->iv);
5819 BITMAP_FREE (use->related_cands);
5820 for (j = 0; j < use->n_map_members; j++)
5821 if (use->cost_map[j].depends_on)
5822 BITMAP_FREE (use->cost_map[j].depends_on);
5823 free (use->cost_map);
5824 free (use);
5826 VEC_truncate (iv_use_p, data->iv_uses, 0);
5828 for (i = 0; i < n_iv_cands (data); i++)
5830 struct iv_cand *cand = iv_cand (data, i);
5832 if (cand->iv)
5833 free (cand->iv);
5834 if (cand->depends_on)
5835 BITMAP_FREE (cand->depends_on);
5836 free (cand);
5838 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5840 if (data->version_info_size < num_ssa_names)
5842 data->version_info_size = 2 * num_ssa_names;
5843 free (data->version_info);
5844 data->version_info = xcalloc (data->version_info_size,
5845 sizeof (struct version_info));
5848 data->max_inv_id = 0;
5850 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5851 SET_DECL_RTL (obj, NULL_RTX);
5853 VEC_truncate (tree, decl_rtl_to_reset, 0);
5856 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5857 loop tree. */
5859 static void
5860 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5862 unsigned i;
5864 for (i = 1; i < loops->num; i++)
5865 if (loops->parray[i])
5867 free (loops->parray[i]->aux);
5868 loops->parray[i]->aux = NULL;
5871 free_loop_data (data);
5872 free (data->version_info);
5873 BITMAP_FREE (data->relevant);
5874 BITMAP_FREE (data->important_candidates);
5875 htab_delete (data->niters);
5877 VEC_free (tree, heap, decl_rtl_to_reset);
5878 VEC_free (iv_use_p, heap, data->iv_uses);
5879 VEC_free (iv_cand_p, heap, data->iv_candidates);
5882 /* Optimizes the LOOP. Returns true if anything changed. */
5884 static bool
5885 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5887 bool changed = false;
5888 struct iv_ca *iv_ca;
5889 edge exit;
5891 data->current_loop = loop;
5893 if (dump_file && (dump_flags & TDF_DETAILS))
5895 fprintf (dump_file, "Processing loop %d\n", loop->num);
5897 exit = single_dom_exit (loop);
5898 if (exit)
5900 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5901 exit->src->index, exit->dest->index);
5902 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5903 fprintf (dump_file, "\n");
5906 fprintf (dump_file, "\n");
5909 /* For each ssa name determines whether it behaves as an induction variable
5910 in some loop. */
5911 if (!find_induction_variables (data))
5912 goto finish;
5914 /* Finds interesting uses (item 1). */
5915 find_interesting_uses (data);
5916 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5917 goto finish;
5919 /* Finds candidates for the induction variables (item 2). */
5920 find_iv_candidates (data);
5922 /* Calculates the costs (item 3, part 1). */
5923 determine_use_iv_costs (data);
5924 determine_iv_costs (data);
5925 determine_set_costs (data);
5927 /* Find the optimal set of induction variables (item 3, part 2). */
5928 iv_ca = find_optimal_iv_set (data);
5929 if (!iv_ca)
5930 goto finish;
5931 changed = true;
5933 /* Create the new induction variables (item 4, part 1). */
5934 create_new_ivs (data, iv_ca);
5935 iv_ca_free (&iv_ca);
5937 /* Rewrite the uses (item 4, part 2). */
5938 rewrite_uses (data);
5940 /* Remove the ivs that are unused after rewriting. */
5941 remove_unused_ivs (data);
5943 /* We have changed the structure of induction variables; it might happen
5944 that definitions in the scev database refer to some of them that were
5945 eliminated. */
5946 scev_reset ();
5948 finish:
5949 free_loop_data (data);
5951 return changed;
5954 /* Main entry point. Optimizes induction variables in LOOPS. */
5956 void
5957 tree_ssa_iv_optimize (struct loops *loops)
5959 struct loop *loop;
5960 struct ivopts_data data;
5962 tree_ssa_iv_optimize_init (loops, &data);
5964 /* Optimize the loops starting with the innermost ones. */
5965 loop = loops->tree_root;
5966 while (loop->inner)
5967 loop = loop->inner;
5969 /* Scan the loops, inner ones first. */
5970 while (loop != loops->tree_root)
5972 if (dump_file && (dump_flags & TDF_DETAILS))
5973 flow_loop_dump (loop, dump_file, NULL, 1);
5975 tree_ssa_iv_optimize_loop (&data, loop);
5977 if (loop->next)
5979 loop = loop->next;
5980 while (loop->inner)
5981 loop = loop->inner;
5983 else
5984 loop = loop->outer;
5987 tree_ssa_iv_optimize_finalize (loops, &data);