* configure.ac: (target_alias): Default to $host_alias, not
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob58771cd6c4d1d0bf69af1b7b599381abbedd0d68
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Information attached to loop. */
127 struct loop_data
129 struct tree_niter_desc niter;
130 /* Number of iterations. */
132 unsigned regs_used; /* Number of registers used. */
135 /* Types of uses. */
136 enum use_type
138 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
139 USE_OUTER, /* The induction variable is used outside the loop. */
140 USE_ADDRESS, /* Use in an address. */
141 USE_COMPARE /* Use is a compare. */
144 /* The candidate - cost pair. */
145 struct cost_pair
147 struct iv_cand *cand; /* The candidate. */
148 unsigned cost; /* The cost. */
149 bitmap depends_on; /* The list of invariants that have to be
150 preserved. */
153 /* Use. */
154 struct iv_use
156 unsigned id; /* The id of the use. */
157 enum use_type type; /* Type of the use. */
158 struct iv *iv; /* The induction variable it is based on. */
159 tree stmt; /* Statement in that it occurs. */
160 tree *op_p; /* The place where it occurs. */
161 bitmap related_cands; /* The set of "related" iv candidates. */
163 unsigned n_map_members; /* Number of candidates in the cost_map list. */
164 struct cost_pair *cost_map;
165 /* The costs wrto the iv candidates. */
167 struct iv_cand *selected;
168 /* The selected candidate. */
171 /* The position where the iv is computed. */
172 enum iv_position
174 IP_NORMAL, /* At the end, just before the exit condition. */
175 IP_END, /* At the end of the latch block. */
176 IP_ORIGINAL /* The original biv. */
179 /* The induction variable candidate. */
180 struct iv_cand
182 unsigned id; /* The number of the candidate. */
183 bool important; /* Whether this is an "important" candidate, i.e. such
184 that it should be considered by all uses. */
185 enum iv_position pos; /* Where it is computed. */
186 tree incremented_at; /* For original biv, the statement where it is
187 incremented. */
188 tree var_before; /* The variable used for it before increment. */
189 tree var_after; /* The variable used for it after increment. */
190 struct iv *iv; /* The value of the candidate. NULL for
191 "pseudocandidate" used to indicate the possibility
192 to replace the final value of an iv by direct
193 computation of the value. */
194 unsigned cost; /* Cost of the candidate. */
197 /* The data used by the induction variable optimizations. */
199 struct ivopts_data
201 /* The currently optimized loop. */
202 struct loop *current_loop;
204 /* The size of version_info array allocated. */
205 unsigned version_info_size;
207 /* The array of information for the ssa names. */
208 struct version_info *version_info;
210 /* The bitmap of indices in version_info whose value was changed. */
211 bitmap relevant;
213 /* The maximum invariant id. */
214 unsigned max_inv_id;
216 /* The uses of induction variables. */
217 varray_type iv_uses;
219 /* The candidates. */
220 varray_type iv_candidates;
222 /* A bitmap of important candidates. */
223 bitmap important_candidates;
225 /* Whether to consider just related and important candidates when replacing a
226 use. */
227 bool consider_all_candidates;
230 /* Bound on number of candidates below that all candidates are considered. */
232 #define CONSIDER_ALL_CANDIDATES_BOUND \
233 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
235 /* If there are more iv occurrences, we just give up (it is quite unlikely that
236 optimizing such a loop would help, and it would take ages). */
238 #define MAX_CONSIDERED_USES \
239 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
241 /* The list of trees for that the decl_rtl field must be reset is stored
242 here. */
244 static varray_type decl_rtl_to_reset;
246 /* Number of uses recorded in DATA. */
248 static inline unsigned
249 n_iv_uses (struct ivopts_data *data)
251 return VARRAY_ACTIVE_SIZE (data->iv_uses);
254 /* Ith use recorded in DATA. */
256 static inline struct iv_use *
257 iv_use (struct ivopts_data *data, unsigned i)
259 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
262 /* Number of candidates recorded in DATA. */
264 static inline unsigned
265 n_iv_cands (struct ivopts_data *data)
267 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
270 /* Ith candidate recorded in DATA. */
272 static inline struct iv_cand *
273 iv_cand (struct ivopts_data *data, unsigned i)
275 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
278 /* The data for LOOP. */
280 static inline struct loop_data *
281 loop_data (struct loop *loop)
283 return loop->aux;
286 /* The single loop exit if it dominates the latch, NULL otherwise. */
288 static edge
289 single_dom_exit (struct loop *loop)
291 edge exit = loop->single_exit;
293 if (!exit)
294 return NULL;
296 if (!just_once_each_iteration_p (loop, exit->src))
297 return NULL;
299 return exit;
302 /* Dumps information about the induction variable IV to FILE. */
304 extern void dump_iv (FILE *, struct iv *);
305 void
306 dump_iv (FILE *file, struct iv *iv)
308 if (iv->ssa_name)
310 fprintf (file, "ssa name ");
311 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
312 fprintf (file, "\n");
315 fprintf (file, " type ");
316 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
317 fprintf (file, "\n");
319 if (iv->step)
321 fprintf (file, " base ");
322 print_generic_expr (file, iv->base, TDF_SLIM);
323 fprintf (file, "\n");
325 fprintf (file, " step ");
326 print_generic_expr (file, iv->step, TDF_SLIM);
327 fprintf (file, "\n");
329 else
331 fprintf (file, " invariant ");
332 print_generic_expr (file, iv->base, TDF_SLIM);
333 fprintf (file, "\n");
336 if (iv->base_object)
338 fprintf (file, " base object ");
339 print_generic_expr (file, iv->base_object, TDF_SLIM);
340 fprintf (file, "\n");
343 if (iv->biv_p)
344 fprintf (file, " is a biv\n");
347 /* Dumps information about the USE to FILE. */
349 extern void dump_use (FILE *, struct iv_use *);
350 void
351 dump_use (FILE *file, struct iv_use *use)
353 fprintf (file, "use %d\n", use->id);
355 switch (use->type)
357 case USE_NONLINEAR_EXPR:
358 fprintf (file, " generic\n");
359 break;
361 case USE_OUTER:
362 fprintf (file, " outside\n");
363 break;
365 case USE_ADDRESS:
366 fprintf (file, " address\n");
367 break;
369 case USE_COMPARE:
370 fprintf (file, " compare\n");
371 break;
373 default:
374 gcc_unreachable ();
377 fprintf (file, " in statement ");
378 print_generic_expr (file, use->stmt, TDF_SLIM);
379 fprintf (file, "\n");
381 fprintf (file, " at position ");
382 if (use->op_p)
383 print_generic_expr (file, *use->op_p, TDF_SLIM);
384 fprintf (file, "\n");
386 dump_iv (file, use->iv);
388 fprintf (file, " related candidates ");
389 dump_bitmap (file, use->related_cands);
392 /* Dumps information about the uses to FILE. */
394 extern void dump_uses (FILE *, struct ivopts_data *);
395 void
396 dump_uses (FILE *file, struct ivopts_data *data)
398 unsigned i;
399 struct iv_use *use;
401 for (i = 0; i < n_iv_uses (data); i++)
403 use = iv_use (data, i);
405 dump_use (file, use);
406 fprintf (file, "\n");
410 /* Dumps information about induction variable candidate CAND to FILE. */
412 extern void dump_cand (FILE *, struct iv_cand *);
413 void
414 dump_cand (FILE *file, struct iv_cand *cand)
416 struct iv *iv = cand->iv;
418 fprintf (file, "candidate %d%s\n",
419 cand->id, cand->important ? " (important)" : "");
421 if (!iv)
423 fprintf (file, " final value replacement\n");
424 return;
427 switch (cand->pos)
429 case IP_NORMAL:
430 fprintf (file, " incremented before exit test\n");
431 break;
433 case IP_END:
434 fprintf (file, " incremented at end\n");
435 break;
437 case IP_ORIGINAL:
438 fprintf (file, " original biv\n");
439 break;
442 dump_iv (file, iv);
445 /* Returns the info for ssa version VER. */
447 static inline struct version_info *
448 ver_info (struct ivopts_data *data, unsigned ver)
450 return data->version_info + ver;
453 /* Returns the info for ssa name NAME. */
455 static inline struct version_info *
456 name_info (struct ivopts_data *data, tree name)
458 return ver_info (data, SSA_NAME_VERSION (name));
461 /* Checks whether there exists number X such that X * B = A, counting modulo
462 2^BITS. */
464 static bool
465 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
466 HOST_WIDE_INT *x)
468 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
469 unsigned HOST_WIDE_INT inv, ex, val;
470 unsigned i;
472 a &= mask;
473 b &= mask;
475 /* First divide the whole equation by 2 as long as possible. */
476 while (!(a & 1) && !(b & 1))
478 a >>= 1;
479 b >>= 1;
480 bits--;
481 mask >>= 1;
484 if (!(b & 1))
486 /* If b is still even, a is odd and there is no such x. */
487 return false;
490 /* Find the inverse of b. We compute it as
491 b^(2^(bits - 1) - 1) (mod 2^bits). */
492 inv = 1;
493 ex = b;
494 for (i = 0; i < bits - 1; i++)
496 inv = (inv * ex) & mask;
497 ex = (ex * ex) & mask;
500 val = (a * inv) & mask;
502 gcc_assert (((val * b) & mask) == a);
504 if ((val >> (bits - 1)) & 1)
505 val |= ~mask;
507 *x = val;
509 return true;
512 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
513 emitted in LOOP. */
515 static bool
516 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
518 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
520 gcc_assert (bb);
522 if (sbb == loop->latch)
523 return true;
525 if (sbb != bb)
526 return false;
528 return stmt == last_stmt (bb);
531 /* Returns true if STMT if after the place where the original induction
532 variable CAND is incremented. */
534 static bool
535 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
537 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
538 basic_block stmt_bb = bb_for_stmt (stmt);
539 block_stmt_iterator bsi;
541 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
542 return false;
544 if (stmt_bb != cand_bb)
545 return true;
547 /* Scan the block from the end, since the original ivs are usually
548 incremented at the end of the loop body. */
549 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
551 if (bsi_stmt (bsi) == cand->incremented_at)
552 return false;
553 if (bsi_stmt (bsi) == stmt)
554 return true;
558 /* Returns true if STMT if after the place where the induction variable
559 CAND is incremented in LOOP. */
561 static bool
562 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
564 switch (cand->pos)
566 case IP_END:
567 return false;
569 case IP_NORMAL:
570 return stmt_after_ip_normal_pos (loop, stmt);
572 case IP_ORIGINAL:
573 return stmt_after_ip_original_pos (cand, stmt);
575 default:
576 gcc_unreachable ();
580 /* Initializes data structures used by the iv optimization pass, stored
581 in DATA. LOOPS is the loop tree. */
583 static void
584 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
586 unsigned i;
588 data->version_info_size = 2 * num_ssa_names;
589 data->version_info = xcalloc (data->version_info_size,
590 sizeof (struct version_info));
591 data->relevant = BITMAP_XMALLOC ();
592 data->max_inv_id = 0;
594 for (i = 1; i < loops->num; i++)
595 if (loops->parray[i])
596 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
598 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
599 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
600 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
603 /* Returns a memory object to that EXPR points. In case we are able to
604 determine that it does not point to any such object, NULL is returned. */
606 static tree
607 determine_base_object (tree expr)
609 enum tree_code code = TREE_CODE (expr);
610 tree base, obj, op0, op1;
612 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
613 return NULL_TREE;
615 switch (code)
617 case INTEGER_CST:
618 return NULL_TREE;
620 case ADDR_EXPR:
621 obj = TREE_OPERAND (expr, 0);
622 base = get_base_address (obj);
624 if (!base)
625 return fold_convert (ptr_type_node, expr);
627 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
629 case PLUS_EXPR:
630 case MINUS_EXPR:
631 op0 = determine_base_object (TREE_OPERAND (expr, 0));
632 op1 = determine_base_object (TREE_OPERAND (expr, 1));
634 if (!op1)
635 return op0;
637 if (!op0)
638 return (code == PLUS_EXPR
639 ? op1
640 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
642 return fold (build (code, ptr_type_node, op0, op1));
644 default:
645 return fold_convert (ptr_type_node, expr);
649 /* Allocates an induction variable with given initial value BASE and step STEP
650 for loop LOOP. */
652 static struct iv *
653 alloc_iv (tree base, tree step)
655 struct iv *iv = xcalloc (1, sizeof (struct iv));
657 if (step && integer_zerop (step))
658 step = NULL_TREE;
660 iv->base = base;
661 iv->base_object = determine_base_object (base);
662 iv->step = step;
663 iv->biv_p = false;
664 iv->have_use_for = false;
665 iv->use_id = 0;
666 iv->ssa_name = NULL_TREE;
668 return iv;
671 /* Sets STEP and BASE for induction variable IV. */
673 static void
674 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
676 struct version_info *info = name_info (data, iv);
678 gcc_assert (!info->iv);
680 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
681 info->iv = alloc_iv (base, step);
682 info->iv->ssa_name = iv;
685 /* Finds induction variable declaration for VAR. */
687 static struct iv *
688 get_iv (struct ivopts_data *data, tree var)
690 basic_block bb;
692 if (!name_info (data, var)->iv)
694 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
696 if (!bb
697 || !flow_bb_inside_loop_p (data->current_loop, bb))
698 set_iv (data, var, var, NULL_TREE);
701 return name_info (data, var)->iv;
704 /* Determines the step of a biv defined in PHI. */
706 static tree
707 determine_biv_step (tree phi)
709 struct loop *loop = bb_for_stmt (phi)->loop_father;
710 tree name = PHI_RESULT (phi), base, step;
711 tree type = TREE_TYPE (name);
713 if (!is_gimple_reg (name))
714 return NULL_TREE;
716 if (!simple_iv (loop, phi, name, &base, &step))
717 return NULL_TREE;
719 if (!step)
720 return build_int_cst (type, 0);
722 return step;
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
727 static bool
728 abnormal_ssa_name_p (tree exp)
730 if (!exp)
731 return false;
733 if (TREE_CODE (exp) != SSA_NAME)
734 return false;
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
742 static bool
743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
744 void *data ATTRIBUTE_UNUSED)
746 if (TREE_CODE (base) == ARRAY_REF)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
749 return false;
750 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
751 return false;
754 return !abnormal_ssa_name_p (*index);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
760 static bool
761 contains_abnormal_ssa_name_p (tree expr)
763 enum tree_code code = TREE_CODE (expr);
764 enum tree_code_class class = TREE_CODE_CLASS (code);
766 if (code == SSA_NAME)
767 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
769 if (code == INTEGER_CST
770 || is_gimple_min_invariant (expr))
771 return false;
773 if (code == ADDR_EXPR)
774 return !for_each_index (&TREE_OPERAND (expr, 1),
775 idx_contains_abnormal_ssa_name_p,
776 NULL);
778 switch (class)
780 case tcc_binary:
781 case tcc_comparison:
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
783 return true;
785 /* Fallthru. */
786 case tcc_unary:
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
788 return true;
790 break;
792 default:
793 gcc_unreachable ();
796 return false;
799 /* Finds basic ivs. */
801 static bool
802 find_bivs (struct ivopts_data *data)
804 tree phi, step, type, base;
805 bool found = false;
806 struct loop *loop = data->current_loop;
808 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
810 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
811 continue;
813 step = determine_biv_step (phi);
815 if (!step)
816 continue;
817 if (cst_and_fits_in_hwi (step)
818 && int_cst_value (step) == 0)
819 continue;
821 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
822 if (contains_abnormal_ssa_name_p (base))
823 continue;
825 type = TREE_TYPE (PHI_RESULT (phi));
826 base = fold_convert (type, base);
827 step = fold_convert (type, step);
829 /* FIXME: We do not handle induction variables whose step does
830 not satisfy cst_and_fits_in_hwi. */
831 if (!cst_and_fits_in_hwi (step))
832 continue;
834 set_iv (data, PHI_RESULT (phi), base, step);
835 found = true;
838 return found;
841 /* Marks basic ivs. */
843 static void
844 mark_bivs (struct ivopts_data *data)
846 tree phi, var;
847 struct iv *iv, *incr_iv;
848 struct loop *loop = data->current_loop;
849 basic_block incr_bb;
851 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
853 iv = get_iv (data, PHI_RESULT (phi));
854 if (!iv)
855 continue;
857 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
858 incr_iv = get_iv (data, var);
859 if (!incr_iv)
860 continue;
862 /* If the increment is in the subloop, ignore it. */
863 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
864 if (incr_bb->loop_father != data->current_loop
865 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
866 continue;
868 iv->biv_p = true;
869 incr_iv->biv_p = true;
873 /* Checks whether STMT defines a linear induction variable and stores its
874 parameters to BASE and STEP. */
876 static bool
877 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
878 tree *base, tree *step)
880 tree lhs;
881 struct loop *loop = data->current_loop;
883 *base = NULL_TREE;
884 *step = NULL_TREE;
886 if (TREE_CODE (stmt) != MODIFY_EXPR)
887 return false;
889 lhs = TREE_OPERAND (stmt, 0);
890 if (TREE_CODE (lhs) != SSA_NAME)
891 return false;
893 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
894 return false;
896 /* FIXME: We do not handle induction variables whose step does
897 not satisfy cst_and_fits_in_hwi. */
898 if (!zero_p (*step)
899 && !cst_and_fits_in_hwi (*step))
900 return false;
902 if (contains_abnormal_ssa_name_p (*base))
903 return false;
905 return true;
908 /* Finds general ivs in statement STMT. */
910 static void
911 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
913 tree base, step;
915 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
916 return;
918 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
921 /* Finds general ivs in basic block BB. */
923 static void
924 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
926 block_stmt_iterator bsi;
928 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
929 find_givs_in_stmt (data, bsi_stmt (bsi));
932 /* Finds general ivs. */
934 static void
935 find_givs (struct ivopts_data *data)
937 struct loop *loop = data->current_loop;
938 basic_block *body = get_loop_body_in_dom_order (loop);
939 unsigned i;
941 for (i = 0; i < loop->num_nodes; i++)
942 find_givs_in_bb (data, body[i]);
943 free (body);
946 /* Determine the number of iterations of the current loop. */
948 static void
949 determine_number_of_iterations (struct ivopts_data *data)
951 struct loop *loop = data->current_loop;
952 edge exit = single_dom_exit (loop);
954 if (!exit)
955 return;
957 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
960 /* For each ssa name defined in LOOP determines whether it is an induction
961 variable and if so, its initial value and step. */
963 static bool
964 find_induction_variables (struct ivopts_data *data)
966 unsigned i;
967 struct loop *loop = data->current_loop;
968 bitmap_iterator bi;
970 if (!find_bivs (data))
971 return false;
973 find_givs (data);
974 mark_bivs (data);
975 determine_number_of_iterations (data);
977 if (dump_file && (dump_flags & TDF_DETAILS))
979 if (loop_data (loop)->niter.niter)
981 fprintf (dump_file, " number of iterations ");
982 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
983 TDF_SLIM);
984 fprintf (dump_file, "\n");
986 fprintf (dump_file, " may be zero if ");
987 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
988 TDF_SLIM);
989 fprintf (dump_file, "\n");
991 fprintf (dump_file, " bogus unless ");
992 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
993 TDF_SLIM);
994 fprintf (dump_file, "\n");
995 fprintf (dump_file, "\n");
998 fprintf (dump_file, "Induction variables:\n\n");
1000 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1002 if (ver_info (data, i)->iv)
1003 dump_iv (dump_file, ver_info (data, i)->iv);
1007 return true;
1010 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1012 static struct iv_use *
1013 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1014 tree stmt, enum use_type use_type)
1016 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1018 use->id = n_iv_uses (data);
1019 use->type = use_type;
1020 use->iv = iv;
1021 use->stmt = stmt;
1022 use->op_p = use_p;
1023 use->related_cands = BITMAP_XMALLOC ();
1025 /* To avoid showing ssa name in the dumps, if it was not reset by the
1026 caller. */
1027 iv->ssa_name = NULL_TREE;
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 dump_use (dump_file, use);
1032 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1034 return use;
1037 /* Checks whether OP is a loop-level invariant and if so, records it.
1038 NONLINEAR_USE is true if the invariant is used in a way we do not
1039 handle specially. */
1041 static void
1042 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1044 basic_block bb;
1045 struct version_info *info;
1047 if (TREE_CODE (op) != SSA_NAME
1048 || !is_gimple_reg (op))
1049 return;
1051 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1052 if (bb
1053 && flow_bb_inside_loop_p (data->current_loop, bb))
1054 return;
1056 info = name_info (data, op);
1057 info->name = op;
1058 info->has_nonlin_use |= nonlinear_use;
1059 if (!info->inv_id)
1060 info->inv_id = ++data->max_inv_id;
1061 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1064 /* Checks whether the use OP is interesting and if so, records it
1065 as TYPE. */
1067 static struct iv_use *
1068 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1069 enum use_type type)
1071 struct iv *iv;
1072 struct iv *civ;
1073 tree stmt;
1074 struct iv_use *use;
1076 if (TREE_CODE (op) != SSA_NAME)
1077 return NULL;
1079 iv = get_iv (data, op);
1080 if (!iv)
1081 return NULL;
1083 if (iv->have_use_for)
1085 use = iv_use (data, iv->use_id);
1087 gcc_assert (use->type == USE_NONLINEAR_EXPR
1088 || use->type == USE_OUTER);
1090 if (type == USE_NONLINEAR_EXPR)
1091 use->type = USE_NONLINEAR_EXPR;
1092 return use;
1095 if (zero_p (iv->step))
1097 record_invariant (data, op, true);
1098 return NULL;
1100 iv->have_use_for = true;
1102 civ = xmalloc (sizeof (struct iv));
1103 *civ = *iv;
1105 stmt = SSA_NAME_DEF_STMT (op);
1106 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1107 || TREE_CODE (stmt) == MODIFY_EXPR);
1109 use = record_use (data, NULL, civ, stmt, type);
1110 iv->use_id = use->id;
1112 return use;
1115 /* Checks whether the use OP is interesting and if so, records it. */
1117 static struct iv_use *
1118 find_interesting_uses_op (struct ivopts_data *data, tree op)
1120 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1123 /* Records a definition of induction variable OP that is used outside of the
1124 loop. */
1126 static struct iv_use *
1127 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1129 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1132 /* Checks whether the condition *COND_P in STMT is interesting
1133 and if so, records it. */
1135 static void
1136 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1138 tree *op0_p;
1139 tree *op1_p;
1140 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1141 struct iv const_iv;
1142 tree zero = integer_zero_node;
1144 const_iv.step = NULL_TREE;
1146 if (integer_zerop (*cond_p)
1147 || integer_nonzerop (*cond_p))
1148 return;
1150 if (TREE_CODE (*cond_p) == SSA_NAME)
1152 op0_p = cond_p;
1153 op1_p = &zero;
1155 else
1157 op0_p = &TREE_OPERAND (*cond_p, 0);
1158 op1_p = &TREE_OPERAND (*cond_p, 1);
1161 if (TREE_CODE (*op0_p) == SSA_NAME)
1162 iv0 = get_iv (data, *op0_p);
1163 else
1164 iv0 = &const_iv;
1166 if (TREE_CODE (*op1_p) == SSA_NAME)
1167 iv1 = get_iv (data, *op1_p);
1168 else
1169 iv1 = &const_iv;
1171 if (/* When comparing with non-invariant value, we may not do any senseful
1172 induction variable elimination. */
1173 (!iv0 || !iv1)
1174 /* Eliminating condition based on two ivs would be nontrivial.
1175 ??? TODO -- it is not really important to handle this case. */
1176 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1178 find_interesting_uses_op (data, *op0_p);
1179 find_interesting_uses_op (data, *op1_p);
1180 return;
1183 if (zero_p (iv0->step) && zero_p (iv1->step))
1185 /* If both are invariants, this is a work for unswitching. */
1186 return;
1189 civ = xmalloc (sizeof (struct iv));
1190 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1191 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1194 /* Returns true if expression EXPR is obviously invariant in LOOP,
1195 i.e. if all its operands are defined outside of the LOOP. */
1197 bool
1198 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1200 basic_block def_bb;
1201 unsigned i, len;
1203 if (is_gimple_min_invariant (expr))
1204 return true;
1206 if (TREE_CODE (expr) == SSA_NAME)
1208 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1209 if (def_bb
1210 && flow_bb_inside_loop_p (loop, def_bb))
1211 return false;
1213 return true;
1216 if (!EXPR_P (expr))
1217 return false;
1219 len = first_rtl_op (TREE_CODE (expr));
1220 for (i = 0; i < len; i++)
1221 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1222 return false;
1224 return true;
1227 /* Cumulates the steps of indices into DATA and replaces their values with the
1228 initial ones. Returns false when the value of the index cannot be determined.
1229 Callback for for_each_index. */
1231 struct ifs_ivopts_data
1233 struct ivopts_data *ivopts_data;
1234 tree stmt;
1235 tree *step_p;
1238 static bool
1239 idx_find_step (tree base, tree *idx, void *data)
1241 struct ifs_ivopts_data *dta = data;
1242 struct iv *iv;
1243 tree step, type, iv_type, iv_step, lbound, off;
1244 struct loop *loop = dta->ivopts_data->current_loop;
1246 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1247 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1248 return false;
1250 /* If base is a component ref, require that the offset of the reference
1251 is invariant. */
1252 if (TREE_CODE (base) == COMPONENT_REF)
1254 off = component_ref_field_offset (base);
1255 return expr_invariant_in_loop_p (loop, off);
1258 /* If base is array, first check whether we will be able to move the
1259 reference out of the loop (in order to take its address in strength
1260 reduction). In order for this to work we need both lower bound
1261 and step to be loop invariants. */
1262 if (TREE_CODE (base) == ARRAY_REF)
1264 step = array_ref_element_size (base);
1265 lbound = array_ref_low_bound (base);
1267 if (!expr_invariant_in_loop_p (loop, step)
1268 || !expr_invariant_in_loop_p (loop, lbound))
1269 return false;
1272 if (TREE_CODE (*idx) != SSA_NAME)
1273 return true;
1275 iv = get_iv (dta->ivopts_data, *idx);
1276 if (!iv)
1277 return false;
1279 *idx = iv->base;
1281 if (!iv->step)
1282 return true;
1284 iv_type = TREE_TYPE (iv->base);
1285 type = build_pointer_type (TREE_TYPE (base));
1286 if (TREE_CODE (base) == ARRAY_REF)
1288 step = array_ref_element_size (base);
1290 /* We only handle addresses whose step is an integer constant. */
1291 if (TREE_CODE (step) != INTEGER_CST)
1292 return false;
1294 else
1295 /* The step for pointer arithmetics already is 1 byte. */
1296 step = build_int_cst (type, 1);
1298 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1299 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1300 type, iv->base, iv->step, dta->stmt);
1301 else
1302 iv_step = fold_convert (iv_type, iv->step);
1304 if (!iv_step)
1306 /* The index might wrap. */
1307 return false;
1310 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1312 if (!*dta->step_p)
1313 *dta->step_p = step;
1314 else
1315 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1317 return true;
1320 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1321 object is passed to it in DATA. */
1323 static bool
1324 idx_record_use (tree base, tree *idx,
1325 void *data)
1327 find_interesting_uses_op (data, *idx);
1328 if (TREE_CODE (base) == ARRAY_REF)
1330 find_interesting_uses_op (data, array_ref_element_size (base));
1331 find_interesting_uses_op (data, array_ref_low_bound (base));
1333 return true;
1336 /* Finds addresses in *OP_P inside STMT. */
1338 static void
1339 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1341 tree base = unshare_expr (*op_p), step = NULL;
1342 struct iv *civ;
1343 struct ifs_ivopts_data ifs_ivopts_data;
1345 /* Ignore bitfields for now. Not really something terribly complicated
1346 to handle. TODO. */
1347 if (TREE_CODE (base) == COMPONENT_REF
1348 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1349 goto fail;
1351 ifs_ivopts_data.ivopts_data = data;
1352 ifs_ivopts_data.stmt = stmt;
1353 ifs_ivopts_data.step_p = &step;
1354 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1355 || zero_p (step))
1356 goto fail;
1358 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1359 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1361 if (TREE_CODE (base) == INDIRECT_REF)
1362 base = TREE_OPERAND (base, 0);
1363 else
1364 base = build_addr (base);
1366 civ = alloc_iv (base, step);
1367 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1368 return;
1370 fail:
1371 for_each_index (op_p, idx_record_use, data);
1374 /* Finds and records invariants used in STMT. */
1376 static void
1377 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1379 use_optype uses = NULL;
1380 unsigned i, n;
1381 tree op;
1383 if (TREE_CODE (stmt) == PHI_NODE)
1384 n = PHI_NUM_ARGS (stmt);
1385 else
1387 get_stmt_operands (stmt);
1388 uses = STMT_USE_OPS (stmt);
1389 n = NUM_USES (uses);
1392 for (i = 0; i < n; i++)
1394 if (TREE_CODE (stmt) == PHI_NODE)
1395 op = PHI_ARG_DEF (stmt, i);
1396 else
1397 op = USE_OP (uses, i);
1399 record_invariant (data, op, false);
1403 /* Finds interesting uses of induction variables in the statement STMT. */
1405 static void
1406 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1408 struct iv *iv;
1409 tree op, lhs, rhs;
1410 use_optype uses = NULL;
1411 unsigned i, n;
1413 find_invariants_stmt (data, stmt);
1415 if (TREE_CODE (stmt) == COND_EXPR)
1417 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1418 return;
1421 if (TREE_CODE (stmt) == MODIFY_EXPR)
1423 lhs = TREE_OPERAND (stmt, 0);
1424 rhs = TREE_OPERAND (stmt, 1);
1426 if (TREE_CODE (lhs) == SSA_NAME)
1428 /* If the statement defines an induction variable, the uses are not
1429 interesting by themselves. */
1431 iv = get_iv (data, lhs);
1433 if (iv && !zero_p (iv->step))
1434 return;
1437 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1439 case tcc_comparison:
1440 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1441 return;
1443 case tcc_reference:
1444 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1445 if (REFERENCE_CLASS_P (lhs))
1446 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1447 return;
1449 default: ;
1452 if (REFERENCE_CLASS_P (lhs)
1453 && is_gimple_val (rhs))
1455 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1456 find_interesting_uses_op (data, rhs);
1457 return;
1460 /* TODO -- we should also handle address uses of type
1462 memory = call (whatever);
1466 call (memory). */
1469 if (TREE_CODE (stmt) == PHI_NODE
1470 && bb_for_stmt (stmt) == data->current_loop->header)
1472 lhs = PHI_RESULT (stmt);
1473 iv = get_iv (data, lhs);
1475 if (iv && !zero_p (iv->step))
1476 return;
1479 if (TREE_CODE (stmt) == PHI_NODE)
1480 n = PHI_NUM_ARGS (stmt);
1481 else
1483 uses = STMT_USE_OPS (stmt);
1484 n = NUM_USES (uses);
1487 for (i = 0; i < n; i++)
1489 if (TREE_CODE (stmt) == PHI_NODE)
1490 op = PHI_ARG_DEF (stmt, i);
1491 else
1492 op = USE_OP (uses, i);
1494 if (TREE_CODE (op) != SSA_NAME)
1495 continue;
1497 iv = get_iv (data, op);
1498 if (!iv)
1499 continue;
1501 find_interesting_uses_op (data, op);
1505 /* Finds interesting uses of induction variables outside of loops
1506 on loop exit edge EXIT. */
1508 static void
1509 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1511 tree phi, def;
1513 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1515 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1516 find_interesting_uses_outer (data, def);
1520 /* Finds uses of the induction variables that are interesting. */
1522 static void
1523 find_interesting_uses (struct ivopts_data *data)
1525 basic_block bb;
1526 block_stmt_iterator bsi;
1527 tree phi;
1528 basic_block *body = get_loop_body (data->current_loop);
1529 unsigned i;
1530 struct version_info *info;
1531 edge e;
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "Uses:\n\n");
1536 for (i = 0; i < data->current_loop->num_nodes; i++)
1538 edge_iterator ei;
1539 bb = body[i];
1541 FOR_EACH_EDGE (e, ei, bb->succs)
1542 if (e->dest != EXIT_BLOCK_PTR
1543 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1544 find_interesting_uses_outside (data, e);
1546 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1547 find_interesting_uses_stmt (data, phi);
1548 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1549 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1552 if (dump_file && (dump_flags & TDF_DETAILS))
1554 bitmap_iterator bi;
1556 fprintf (dump_file, "\n");
1558 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1560 info = ver_info (data, i);
1561 if (info->inv_id)
1563 fprintf (dump_file, " ");
1564 print_generic_expr (dump_file, info->name, TDF_SLIM);
1565 fprintf (dump_file, " is invariant (%d)%s\n",
1566 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1570 fprintf (dump_file, "\n");
1573 free (body);
1576 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1577 position to POS. If USE is not NULL, the candidate is set as related to
1578 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1579 replacement of the final value of the iv by a direct computation. */
1581 static struct iv_cand *
1582 add_candidate_1 (struct ivopts_data *data,
1583 tree base, tree step, bool important, enum iv_position pos,
1584 struct iv_use *use, tree incremented_at)
1586 unsigned i;
1587 struct iv_cand *cand = NULL;
1588 tree type;
1590 if (base)
1592 type = TREE_TYPE (base);
1593 if (!TYPE_UNSIGNED (type))
1595 type = unsigned_type_for (type);
1596 base = fold_convert (type, base);
1597 if (step)
1598 step = fold_convert (type, step);
1602 for (i = 0; i < n_iv_cands (data); i++)
1604 cand = iv_cand (data, i);
1606 if (cand->pos != pos)
1607 continue;
1609 if (cand->incremented_at != incremented_at)
1610 continue;
1612 if (!cand->iv)
1614 if (!base && !step)
1615 break;
1617 continue;
1620 if (!base && !step)
1621 continue;
1623 if (!operand_equal_p (base, cand->iv->base, 0))
1624 continue;
1626 if (zero_p (cand->iv->step))
1628 if (zero_p (step))
1629 break;
1631 else
1633 if (step && operand_equal_p (step, cand->iv->step, 0))
1634 break;
1638 if (i == n_iv_cands (data))
1640 cand = xcalloc (1, sizeof (struct iv_cand));
1641 cand->id = i;
1643 if (!base && !step)
1644 cand->iv = NULL;
1645 else
1646 cand->iv = alloc_iv (base, step);
1648 cand->pos = pos;
1649 if (pos != IP_ORIGINAL && cand->iv)
1651 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1652 cand->var_after = cand->var_before;
1654 cand->important = important;
1655 cand->incremented_at = incremented_at;
1656 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1658 if (dump_file && (dump_flags & TDF_DETAILS))
1659 dump_cand (dump_file, cand);
1662 if (important && !cand->important)
1664 cand->important = true;
1665 if (dump_file && (dump_flags & TDF_DETAILS))
1666 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1669 if (use)
1671 bitmap_set_bit (use->related_cands, i);
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, "Candidate %d is related to use %d\n",
1674 cand->id, use->id);
1677 return cand;
1680 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1681 position to POS. If USE is not NULL, the candidate is set as related to
1682 it. The candidate computation is scheduled on all available positions. */
1684 static void
1685 add_candidate (struct ivopts_data *data,
1686 tree base, tree step, bool important, struct iv_use *use)
1688 if (ip_normal_pos (data->current_loop))
1689 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1690 if (ip_end_pos (data->current_loop))
1691 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1694 /* Adds standard iv candidates. */
1696 static void
1697 add_standard_iv_candidates (struct ivopts_data *data)
1699 /* Add 0 + 1 * iteration candidate. */
1700 add_candidate (data,
1701 build_int_cst (unsigned_intSI_type_node, 0),
1702 build_int_cst (unsigned_intSI_type_node, 1),
1703 true, NULL);
1705 /* The same for a long type if it is still fast enough. */
1706 if (BITS_PER_WORD > 32)
1707 add_candidate (data,
1708 build_int_cst (unsigned_intDI_type_node, 0),
1709 build_int_cst (unsigned_intDI_type_node, 1),
1710 true, NULL);
1714 /* Adds candidates bases on the old induction variable IV. */
1716 static void
1717 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1719 tree phi, def;
1720 struct iv_cand *cand;
1722 add_candidate (data, iv->base, iv->step, true, NULL);
1724 /* The same, but with initial value zero. */
1725 add_candidate (data,
1726 build_int_cst (TREE_TYPE (iv->base), 0),
1727 iv->step, true, NULL);
1729 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1730 if (TREE_CODE (phi) == PHI_NODE)
1732 /* Additionally record the possibility of leaving the original iv
1733 untouched. */
1734 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1735 cand = add_candidate_1 (data,
1736 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1737 SSA_NAME_DEF_STMT (def));
1738 cand->var_before = iv->ssa_name;
1739 cand->var_after = def;
1743 /* Adds candidates based on the old induction variables. */
1745 static void
1746 add_old_ivs_candidates (struct ivopts_data *data)
1748 unsigned i;
1749 struct iv *iv;
1750 bitmap_iterator bi;
1752 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1754 iv = ver_info (data, i)->iv;
1755 if (iv && iv->biv_p && !zero_p (iv->step))
1756 add_old_iv_candidates (data, iv);
1760 /* Adds candidates based on the value of the induction variable IV and USE. */
1762 static void
1763 add_iv_value_candidates (struct ivopts_data *data,
1764 struct iv *iv, struct iv_use *use)
1766 add_candidate (data, iv->base, iv->step, false, use);
1768 /* The same, but with initial value zero. */
1769 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1770 iv->step, false, use);
1773 /* Adds candidates based on the address IV and USE. */
1775 static void
1776 add_address_candidates (struct ivopts_data *data,
1777 struct iv *iv, struct iv_use *use)
1779 tree base, abase, tmp, *act;
1781 /* First, the trivial choices. */
1782 add_iv_value_candidates (data, iv, use);
1784 /* Second, try removing the COMPONENT_REFs. */
1785 if (TREE_CODE (iv->base) == ADDR_EXPR)
1787 base = TREE_OPERAND (iv->base, 0);
1788 while (TREE_CODE (base) == COMPONENT_REF
1789 || (TREE_CODE (base) == ARRAY_REF
1790 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1791 base = TREE_OPERAND (base, 0);
1793 if (base != TREE_OPERAND (iv->base, 0))
1795 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1796 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1798 if (TREE_CODE (base) == INDIRECT_REF)
1799 base = TREE_OPERAND (base, 0);
1800 else
1801 base = build_addr (base);
1802 add_candidate (data, base, iv->step, false, use);
1806 /* Third, try removing the constant offset. */
1807 abase = iv->base;
1808 while (TREE_CODE (abase) == PLUS_EXPR
1809 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1810 abase = TREE_OPERAND (abase, 0);
1811 /* We found the offset, so make the copy of the non-shared part and
1812 remove it. */
1813 if (TREE_CODE (abase) == PLUS_EXPR)
1815 tmp = iv->base;
1816 act = &base;
1818 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1820 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1821 NULL_TREE, TREE_OPERAND (tmp, 1));
1822 act = &TREE_OPERAND (*act, 0);
1824 *act = TREE_OPERAND (tmp, 0);
1826 add_candidate (data, base, iv->step, false, use);
1830 /* Possibly adds pseudocandidate for replacing the final value of USE by
1831 a direct computation. */
1833 static void
1834 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1836 struct tree_niter_desc *niter;
1837 struct loop *loop = data->current_loop;
1839 /* We must know where we exit the loop and how many times does it roll. */
1840 if (!single_dom_exit (loop))
1841 return;
1843 niter = &loop_data (loop)->niter;
1844 if (!niter->niter
1845 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1846 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1847 return;
1849 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1852 /* Adds candidates based on the uses. */
1854 static void
1855 add_derived_ivs_candidates (struct ivopts_data *data)
1857 unsigned i;
1859 for (i = 0; i < n_iv_uses (data); i++)
1861 struct iv_use *use = iv_use (data, i);
1863 if (!use)
1864 continue;
1866 switch (use->type)
1868 case USE_NONLINEAR_EXPR:
1869 case USE_COMPARE:
1870 /* Just add the ivs based on the value of the iv used here. */
1871 add_iv_value_candidates (data, use->iv, use);
1872 break;
1874 case USE_OUTER:
1875 add_iv_value_candidates (data, use->iv, use);
1877 /* Additionally, add the pseudocandidate for the possibility to
1878 replace the final value by a direct computation. */
1879 add_iv_outer_candidates (data, use);
1880 break;
1882 case USE_ADDRESS:
1883 add_address_candidates (data, use->iv, use);
1884 break;
1886 default:
1887 gcc_unreachable ();
1892 /* Finds the candidates for the induction variables. */
1894 static void
1895 find_iv_candidates (struct ivopts_data *data)
1897 /* Add commonly used ivs. */
1898 add_standard_iv_candidates (data);
1900 /* Add old induction variables. */
1901 add_old_ivs_candidates (data);
1903 /* Add induction variables derived from uses. */
1904 add_derived_ivs_candidates (data);
1907 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1908 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1909 we allocate a simple list to every use. */
1911 static void
1912 alloc_use_cost_map (struct ivopts_data *data)
1914 unsigned i, n_imp = 0, size, j;
1916 if (!data->consider_all_candidates)
1918 for (i = 0; i < n_iv_cands (data); i++)
1920 struct iv_cand *cand = iv_cand (data, i);
1921 if (cand->important)
1922 n_imp++;
1926 for (i = 0; i < n_iv_uses (data); i++)
1928 struct iv_use *use = iv_use (data, i);
1929 bitmap_iterator bi;
1931 if (data->consider_all_candidates)
1933 size = n_iv_cands (data);
1934 use->n_map_members = size;
1936 else
1938 size = n_imp;
1939 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
1941 size++;
1943 use->n_map_members = 0;
1946 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1950 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1951 on invariants DEPENDS_ON. */
1953 static void
1954 set_use_iv_cost (struct ivopts_data *data,
1955 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1956 bitmap depends_on)
1958 if (cost == INFTY
1959 && depends_on)
1961 BITMAP_XFREE (depends_on);
1962 depends_on = NULL;
1965 if (data->consider_all_candidates)
1967 use->cost_map[cand->id].cand = cand;
1968 use->cost_map[cand->id].cost = cost;
1969 use->cost_map[cand->id].depends_on = depends_on;
1970 return;
1973 if (cost == INFTY)
1974 return;
1976 use->cost_map[use->n_map_members].cand = cand;
1977 use->cost_map[use->n_map_members].cost = cost;
1978 use->cost_map[use->n_map_members].depends_on = depends_on;
1979 use->n_map_members++;
1982 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1983 DEPENDS_ON. */
1985 static unsigned
1986 get_use_iv_cost (struct ivopts_data *data,
1987 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1989 unsigned i;
1991 if (!cand)
1992 return INFTY;
1994 if (data->consider_all_candidates)
1995 i = cand->id;
1996 else
1998 for (i = 0; i < use->n_map_members; i++)
1999 if (use->cost_map[i].cand == cand)
2000 break;
2002 if (i == use->n_map_members)
2003 return INFTY;
2006 if (depends_on)
2007 *depends_on = use->cost_map[i].depends_on;
2008 return use->cost_map[i].cost;
2011 /* Returns estimate on cost of computing SEQ. */
2013 static unsigned
2014 seq_cost (rtx seq)
2016 unsigned cost = 0;
2017 rtx set;
2019 for (; seq; seq = NEXT_INSN (seq))
2021 set = single_set (seq);
2022 if (set)
2023 cost += rtx_cost (set, SET);
2024 else
2025 cost++;
2028 return cost;
2031 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2032 static rtx
2033 produce_memory_decl_rtl (tree obj, int *regno)
2035 rtx x;
2036 if (!obj)
2037 abort ();
2038 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2040 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2041 x = gen_rtx_SYMBOL_REF (Pmode, name);
2043 else
2044 x = gen_raw_REG (Pmode, (*regno)++);
2046 return gen_rtx_MEM (DECL_MODE (obj), x);
2049 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2050 walk_tree. DATA contains the actual fake register number. */
2052 static tree
2053 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2055 tree obj = NULL_TREE;
2056 rtx x = NULL_RTX;
2057 int *regno = data;
2059 switch (TREE_CODE (*expr_p))
2061 case ADDR_EXPR:
2062 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2063 (handled_component_p (*expr_p)
2064 || TREE_CODE (*expr_p) == REALPART_EXPR
2065 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2066 expr_p = &TREE_OPERAND (*expr_p, 0));
2067 obj = *expr_p;
2068 if (DECL_P (obj))
2069 x = produce_memory_decl_rtl (obj, regno);
2070 break;
2072 case SSA_NAME:
2073 *ws = 0;
2074 obj = SSA_NAME_VAR (*expr_p);
2075 if (!DECL_RTL_SET_P (obj))
2076 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2077 break;
2079 case VAR_DECL:
2080 case PARM_DECL:
2081 case RESULT_DECL:
2082 *ws = 0;
2083 obj = *expr_p;
2085 if (DECL_RTL_SET_P (obj))
2086 break;
2088 if (DECL_MODE (obj) == BLKmode)
2089 x = produce_memory_decl_rtl (obj, regno);
2090 else
2091 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2093 break;
2095 default:
2096 break;
2099 if (x)
2101 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2102 SET_DECL_RTL (obj, x);
2105 return NULL_TREE;
2108 /* Determines cost of the computation of EXPR. */
2110 static unsigned
2111 computation_cost (tree expr)
2113 rtx seq, rslt;
2114 tree type = TREE_TYPE (expr);
2115 unsigned cost;
2116 int regno = 0;
2118 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2119 start_sequence ();
2120 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2121 seq = get_insns ();
2122 end_sequence ();
2124 cost = seq_cost (seq);
2125 if (GET_CODE (rslt) == MEM)
2126 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2128 return cost;
2131 /* Returns variable containing the value of candidate CAND at statement AT. */
2133 static tree
2134 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2136 if (stmt_after_increment (loop, cand, stmt))
2137 return cand->var_after;
2138 else
2139 return cand->var_before;
2142 /* Determines the expression by that USE is expressed from induction variable
2143 CAND at statement AT in LOOP. */
2145 static tree
2146 get_computation_at (struct loop *loop,
2147 struct iv_use *use, struct iv_cand *cand, tree at)
2149 tree ubase = use->iv->base;
2150 tree ustep = use->iv->step;
2151 tree cbase = cand->iv->base;
2152 tree cstep = cand->iv->step;
2153 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2154 tree uutype;
2155 tree expr, delta;
2156 tree ratio;
2157 unsigned HOST_WIDE_INT ustepi, cstepi;
2158 HOST_WIDE_INT ratioi;
2160 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2162 /* We do not have a precision to express the values of use. */
2163 return NULL_TREE;
2166 expr = var_at_stmt (loop, cand, at);
2168 if (TREE_TYPE (expr) != ctype)
2170 /* This may happen with the original ivs. */
2171 expr = fold_convert (ctype, expr);
2174 if (TYPE_UNSIGNED (utype))
2175 uutype = utype;
2176 else
2178 uutype = unsigned_type_for (utype);
2179 ubase = fold_convert (uutype, ubase);
2180 ustep = fold_convert (uutype, ustep);
2183 if (uutype != ctype)
2185 expr = fold_convert (uutype, expr);
2186 cbase = fold_convert (uutype, cbase);
2187 cstep = fold_convert (uutype, cstep);
2190 if (!cst_and_fits_in_hwi (cstep)
2191 || !cst_and_fits_in_hwi (ustep))
2192 return NULL_TREE;
2194 ustepi = int_cst_value (ustep);
2195 cstepi = int_cst_value (cstep);
2197 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2199 /* TODO maybe consider case when ustep divides cstep and the ratio is
2200 a power of 2 (so that the division is fast to execute)? We would
2201 need to be much more careful with overflows etc. then. */
2202 return NULL_TREE;
2205 /* We may need to shift the value if we are after the increment. */
2206 if (stmt_after_increment (loop, cand, at))
2207 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2209 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2210 or |ratio| == 1, it is better to handle this like
2212 ubase - ratio * cbase + ratio * var. */
2214 if (ratioi == 1)
2216 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2217 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2219 else if (ratioi == -1)
2221 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2222 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2224 else if (TREE_CODE (cbase) == INTEGER_CST)
2226 ratio = build_int_cst_type (uutype, ratioi);
2227 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2228 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2229 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2230 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2232 else
2234 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2235 ratio = build_int_cst_type (uutype, ratioi);
2236 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2237 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2240 return fold_convert (utype, expr);
2243 /* Determines the expression by that USE is expressed from induction variable
2244 CAND in LOOP. */
2246 static tree
2247 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2249 return get_computation_at (loop, use, cand, use->stmt);
2252 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2254 static void
2255 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2257 tree op0, op1;
2258 enum tree_code code;
2260 while (1)
2262 if (cst_and_fits_in_hwi (*expr))
2264 *offset += int_cst_value (*expr);
2265 *expr = integer_zero_node;
2266 return;
2269 code = TREE_CODE (*expr);
2271 if (code != PLUS_EXPR && code != MINUS_EXPR)
2272 return;
2274 op0 = TREE_OPERAND (*expr, 0);
2275 op1 = TREE_OPERAND (*expr, 1);
2277 if (cst_and_fits_in_hwi (op1))
2279 if (code == PLUS_EXPR)
2280 *offset += int_cst_value (op1);
2281 else
2282 *offset -= int_cst_value (op1);
2284 *expr = op0;
2285 continue;
2288 if (code != PLUS_EXPR)
2289 return;
2291 if (!cst_and_fits_in_hwi (op0))
2292 return;
2294 *offset += int_cst_value (op0);
2295 *expr = op1;
2299 /* Returns cost of addition in MODE. */
2301 static unsigned
2302 add_cost (enum machine_mode mode)
2304 static unsigned costs[NUM_MACHINE_MODES];
2305 rtx seq;
2306 unsigned cost;
2308 if (costs[mode])
2309 return costs[mode];
2311 start_sequence ();
2312 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2313 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2314 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2315 NULL_RTX);
2316 seq = get_insns ();
2317 end_sequence ();
2319 cost = seq_cost (seq);
2320 if (!cost)
2321 cost = 1;
2323 costs[mode] = cost;
2325 if (dump_file && (dump_flags & TDF_DETAILS))
2326 fprintf (dump_file, "Addition in %s costs %d\n",
2327 GET_MODE_NAME (mode), cost);
2328 return cost;
2331 /* Entry in a hashtable of already known costs for multiplication. */
2332 struct mbc_entry
2334 HOST_WIDE_INT cst; /* The constant to multiply by. */
2335 enum machine_mode mode; /* In mode. */
2336 unsigned cost; /* The cost. */
2339 /* Counts hash value for the ENTRY. */
2341 static hashval_t
2342 mbc_entry_hash (const void *entry)
2344 const struct mbc_entry *e = entry;
2346 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2349 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2351 static int
2352 mbc_entry_eq (const void *entry1, const void *entry2)
2354 const struct mbc_entry *e1 = entry1;
2355 const struct mbc_entry *e2 = entry2;
2357 return (e1->mode == e2->mode
2358 && e1->cst == e2->cst);
2361 /* Returns cost of multiplication by constant CST in MODE. */
2363 static unsigned
2364 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2366 static htab_t costs;
2367 struct mbc_entry **cached, act;
2368 rtx seq;
2369 unsigned cost;
2371 if (!costs)
2372 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2374 act.mode = mode;
2375 act.cst = cst;
2376 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2377 if (*cached)
2378 return (*cached)->cost;
2380 *cached = xmalloc (sizeof (struct mbc_entry));
2381 (*cached)->mode = mode;
2382 (*cached)->cst = cst;
2384 start_sequence ();
2385 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2386 NULL_RTX, 0);
2387 seq = get_insns ();
2388 end_sequence ();
2390 cost = seq_cost (seq);
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2394 (int) cst, GET_MODE_NAME (mode), cost);
2396 (*cached)->cost = cost;
2398 return cost;
2401 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2402 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2403 variable is omitted. The created memory accesses MODE.
2405 TODO -- there must be some better way. This all is quite crude. */
2407 static unsigned
2408 get_address_cost (bool symbol_present, bool var_present,
2409 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2411 #define MAX_RATIO 128
2412 static sbitmap valid_mult;
2413 static HOST_WIDE_INT rat, off;
2414 static HOST_WIDE_INT min_offset, max_offset;
2415 static unsigned costs[2][2][2][2];
2416 unsigned cost, acost;
2417 rtx seq, addr, base;
2418 bool offset_p, ratio_p;
2419 rtx reg1;
2420 HOST_WIDE_INT s_offset;
2421 unsigned HOST_WIDE_INT mask;
2422 unsigned bits;
2424 if (!valid_mult)
2426 HOST_WIDE_INT i;
2428 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2430 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2431 for (i = 1; i <= 1 << 20; i <<= 1)
2433 XEXP (addr, 1) = GEN_INT (i);
2434 if (!memory_address_p (Pmode, addr))
2435 break;
2437 max_offset = i >> 1;
2438 off = max_offset;
2440 for (i = 1; i <= 1 << 20; i <<= 1)
2442 XEXP (addr, 1) = GEN_INT (-i);
2443 if (!memory_address_p (Pmode, addr))
2444 break;
2446 min_offset = -(i >> 1);
2448 if (dump_file && (dump_flags & TDF_DETAILS))
2450 fprintf (dump_file, "get_address_cost:\n");
2451 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2452 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2455 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2456 sbitmap_zero (valid_mult);
2457 rat = 1;
2458 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2459 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2461 XEXP (addr, 1) = GEN_INT (i);
2462 if (memory_address_p (Pmode, addr))
2464 SET_BIT (valid_mult, i + MAX_RATIO);
2465 rat = i;
2469 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, " allowed multipliers:");
2472 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2473 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2474 fprintf (dump_file, " %d", (int) i);
2475 fprintf (dump_file, "\n");
2476 fprintf (dump_file, "\n");
2480 bits = GET_MODE_BITSIZE (Pmode);
2481 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2482 offset &= mask;
2483 if ((offset >> (bits - 1) & 1))
2484 offset |= ~mask;
2485 s_offset = offset;
2487 cost = 0;
2488 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2489 ratio_p = (ratio != 1
2490 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2491 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2493 if (ratio != 1 && !ratio_p)
2494 cost += multiply_by_cost (ratio, Pmode);
2496 if (s_offset && !offset_p && !symbol_present)
2498 cost += add_cost (Pmode);
2499 var_present = true;
2502 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2503 if (!acost)
2505 acost = 0;
2507 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2508 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2509 if (ratio_p)
2510 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2512 if (symbol_present)
2514 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2515 if (offset_p)
2516 base = gen_rtx_fmt_e (CONST, Pmode,
2517 gen_rtx_fmt_ee (PLUS, Pmode,
2518 base,
2519 GEN_INT (off)));
2520 if (var_present)
2521 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2524 else if (var_present)
2526 base = reg1;
2527 if (offset_p)
2528 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2530 else if (offset_p)
2531 base = GEN_INT (off);
2532 else
2533 base = NULL_RTX;
2535 if (base)
2536 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2538 start_sequence ();
2539 addr = memory_address (Pmode, addr);
2540 seq = get_insns ();
2541 end_sequence ();
2543 acost = seq_cost (seq);
2544 acost += address_cost (addr, Pmode);
2546 if (!acost)
2547 acost = 1;
2548 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2551 return cost + acost;
2554 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2555 the bitmap to that we should store it. */
2557 static struct ivopts_data *fd_ivopts_data;
2558 static tree
2559 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2561 bitmap *depends_on = data;
2562 struct version_info *info;
2564 if (TREE_CODE (*expr_p) != SSA_NAME)
2565 return NULL_TREE;
2566 info = name_info (fd_ivopts_data, *expr_p);
2568 if (!info->inv_id || info->has_nonlin_use)
2569 return NULL_TREE;
2571 if (!*depends_on)
2572 *depends_on = BITMAP_XMALLOC ();
2573 bitmap_set_bit (*depends_on, info->inv_id);
2575 return NULL_TREE;
2578 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2579 invariants the computation depends on. */
2581 static unsigned
2582 force_var_cost (struct ivopts_data *data,
2583 tree expr, bitmap *depends_on)
2585 static bool costs_initialized = false;
2586 static unsigned integer_cost;
2587 static unsigned symbol_cost;
2588 static unsigned address_cost;
2590 if (!costs_initialized)
2592 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2593 rtx x = gen_rtx_MEM (DECL_MODE (var),
2594 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2595 tree addr;
2596 tree type = build_pointer_type (integer_type_node);
2598 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2599 2000));
2601 SET_DECL_RTL (var, x);
2602 TREE_STATIC (var) = 1;
2603 addr = build1 (ADDR_EXPR, type, var);
2604 symbol_cost = computation_cost (addr) + 1;
2606 address_cost
2607 = computation_cost (build2 (PLUS_EXPR, type,
2608 addr,
2609 build_int_cst_type (type, 2000))) + 1;
2610 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, "force_var_cost:\n");
2613 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2614 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2615 fprintf (dump_file, " address %d\n", (int) address_cost);
2616 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2617 fprintf (dump_file, "\n");
2620 costs_initialized = true;
2623 if (depends_on)
2625 fd_ivopts_data = data;
2626 walk_tree (&expr, find_depends, depends_on, NULL);
2629 if (SSA_VAR_P (expr))
2630 return 0;
2632 if (TREE_INVARIANT (expr))
2634 if (TREE_CODE (expr) == INTEGER_CST)
2635 return integer_cost;
2637 if (TREE_CODE (expr) == ADDR_EXPR)
2639 tree obj = TREE_OPERAND (expr, 0);
2641 if (TREE_CODE (obj) == VAR_DECL
2642 || TREE_CODE (obj) == PARM_DECL
2643 || TREE_CODE (obj) == RESULT_DECL)
2644 return symbol_cost;
2647 return address_cost;
2650 /* Just an arbitrary value, FIXME. */
2651 return target_spill_cost;
2654 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2655 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2656 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2657 invariants the computation depends on. */
2659 static unsigned
2660 split_address_cost (struct ivopts_data *data,
2661 tree addr, bool *symbol_present, bool *var_present,
2662 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2664 tree core;
2665 HOST_WIDE_INT bitsize;
2666 HOST_WIDE_INT bitpos;
2667 tree toffset;
2668 enum machine_mode mode;
2669 int unsignedp, volatilep;
2671 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2672 &unsignedp, &volatilep);
2674 if (toffset != 0
2675 || bitpos % BITS_PER_UNIT != 0
2676 || TREE_CODE (core) != VAR_DECL)
2678 *symbol_present = false;
2679 *var_present = true;
2680 fd_ivopts_data = data;
2681 walk_tree (&addr, find_depends, depends_on, NULL);
2682 return target_spill_cost;
2685 *offset += bitpos / BITS_PER_UNIT;
2686 if (TREE_STATIC (core)
2687 || DECL_EXTERNAL (core))
2689 *symbol_present = true;
2690 *var_present = false;
2691 return 0;
2694 *symbol_present = false;
2695 *var_present = true;
2696 return 0;
2699 /* Estimates cost of expressing difference of addresses E1 - E2 as
2700 var + symbol + offset. The value of offset is added to OFFSET,
2701 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2702 part is missing. DEPENDS_ON is a set of the invariants the computation
2703 depends on. */
2705 static unsigned
2706 ptr_difference_cost (struct ivopts_data *data,
2707 tree e1, tree e2, bool *symbol_present, bool *var_present,
2708 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2710 HOST_WIDE_INT diff = 0;
2711 unsigned cost;
2713 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2715 if (TREE_CODE (e2) == ADDR_EXPR
2716 && ptr_difference_const (TREE_OPERAND (e1, 0),
2717 TREE_OPERAND (e2, 0), &diff))
2719 *offset += diff;
2720 *symbol_present = false;
2721 *var_present = false;
2722 return 0;
2725 if (e2 == integer_zero_node)
2726 return split_address_cost (data, TREE_OPERAND (e1, 0),
2727 symbol_present, var_present, offset, depends_on);
2729 *symbol_present = false;
2730 *var_present = true;
2732 cost = force_var_cost (data, e1, depends_on);
2733 cost += force_var_cost (data, e2, depends_on);
2734 cost += add_cost (Pmode);
2736 return cost;
2739 /* Estimates cost of expressing difference E1 - E2 as
2740 var + symbol + offset. The value of offset is added to OFFSET,
2741 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2742 part is missing. DEPENDS_ON is a set of the invariants the computation
2743 depends on. */
2745 static unsigned
2746 difference_cost (struct ivopts_data *data,
2747 tree e1, tree e2, bool *symbol_present, bool *var_present,
2748 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2750 unsigned cost;
2751 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2753 strip_offset (&e1, offset);
2754 *offset = -*offset;
2755 strip_offset (&e2, offset);
2756 *offset = -*offset;
2758 if (TREE_CODE (e1) == ADDR_EXPR)
2759 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2760 depends_on);
2761 *symbol_present = false;
2763 if (operand_equal_p (e1, e2, 0))
2765 *var_present = false;
2766 return 0;
2768 *var_present = true;
2769 if (zero_p (e2))
2770 return force_var_cost (data, e1, depends_on);
2772 if (zero_p (e1))
2774 cost = force_var_cost (data, e2, depends_on);
2775 cost += multiply_by_cost (-1, mode);
2777 return cost;
2780 cost = force_var_cost (data, e1, depends_on);
2781 cost += force_var_cost (data, e2, depends_on);
2782 cost += add_cost (mode);
2784 return cost;
2787 /* Determines the cost of the computation by that USE is expressed
2788 from induction variable CAND. If ADDRESS_P is true, we just need
2789 to create an address from it, otherwise we want to get it into
2790 register. A set of invariants we depend on is stored in
2791 DEPENDS_ON. AT is the statement at that the value is computed. */
2793 static unsigned
2794 get_computation_cost_at (struct ivopts_data *data,
2795 struct iv_use *use, struct iv_cand *cand,
2796 bool address_p, bitmap *depends_on, tree at)
2798 tree ubase = use->iv->base, ustep = use->iv->step;
2799 tree cbase, cstep;
2800 tree utype = TREE_TYPE (ubase), ctype;
2801 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2802 HOST_WIDE_INT ratio, aratio;
2803 bool var_present, symbol_present;
2804 unsigned cost = 0, n_sums;
2806 *depends_on = NULL;
2808 /* Only consider real candidates. */
2809 if (!cand->iv)
2810 return INFTY;
2812 cbase = cand->iv->base;
2813 cstep = cand->iv->step;
2814 ctype = TREE_TYPE (cbase);
2816 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2818 /* We do not have a precision to express the values of use. */
2819 return INFTY;
2822 if (address_p)
2824 /* Do not try to express address of an object with computation based
2825 on address of a different object. This may cause problems in rtl
2826 level alias analysis (that does not expect this to be happening,
2827 as this is illegal in C), and would be unlikely to be useful
2828 anyway. */
2829 if (use->iv->base_object
2830 && cand->iv->base_object
2831 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2832 return INFTY;
2835 if (!cst_and_fits_in_hwi (ustep)
2836 || !cst_and_fits_in_hwi (cstep))
2837 return INFTY;
2839 if (TREE_CODE (ubase) == INTEGER_CST
2840 && !cst_and_fits_in_hwi (ubase))
2841 goto fallback;
2843 if (TREE_CODE (cbase) == INTEGER_CST
2844 && !cst_and_fits_in_hwi (cbase))
2845 goto fallback;
2847 ustepi = int_cst_value (ustep);
2848 cstepi = int_cst_value (cstep);
2850 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2852 /* TODO -- add direct handling of this case. */
2853 goto fallback;
2856 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2857 return INFTY;
2859 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2860 or ratio == 1, it is better to handle this like
2862 ubase - ratio * cbase + ratio * var
2864 (also holds in the case ratio == -1, TODO. */
2866 if (TREE_CODE (cbase) == INTEGER_CST)
2868 offset = - ratio * int_cst_value (cbase);
2869 cost += difference_cost (data,
2870 ubase, integer_zero_node,
2871 &symbol_present, &var_present, &offset,
2872 depends_on);
2874 else if (ratio == 1)
2876 cost += difference_cost (data,
2877 ubase, cbase,
2878 &symbol_present, &var_present, &offset,
2879 depends_on);
2881 else
2883 cost += force_var_cost (data, cbase, depends_on);
2884 cost += add_cost (TYPE_MODE (ctype));
2885 cost += difference_cost (data,
2886 ubase, integer_zero_node,
2887 &symbol_present, &var_present, &offset,
2888 depends_on);
2891 /* If we are after the increment, the value of the candidate is higher by
2892 one iteration. */
2893 if (stmt_after_increment (data->current_loop, cand, at))
2894 offset -= ratio * cstepi;
2896 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2897 (symbol/var/const parts may be omitted). If we are looking for an address,
2898 find the cost of addressing this. */
2899 if (address_p)
2900 return get_address_cost (symbol_present, var_present, offset, ratio);
2902 /* Otherwise estimate the costs for computing the expression. */
2903 aratio = ratio > 0 ? ratio : -ratio;
2904 if (!symbol_present && !var_present && !offset)
2906 if (ratio != 1)
2907 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2909 return cost;
2912 if (aratio != 1)
2913 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2915 n_sums = 1;
2916 if (var_present
2917 /* Symbol + offset should be compile-time computable. */
2918 && (symbol_present || offset))
2919 n_sums++;
2921 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2923 fallback:
2925 /* Just get the expression, expand it and measure the cost. */
2926 tree comp = get_computation_at (data->current_loop, use, cand, at);
2928 if (!comp)
2929 return INFTY;
2931 if (address_p)
2932 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2934 return computation_cost (comp);
2938 /* Determines the cost of the computation by that USE is expressed
2939 from induction variable CAND. If ADDRESS_P is true, we just need
2940 to create an address from it, otherwise we want to get it into
2941 register. A set of invariants we depend on is stored in
2942 DEPENDS_ON. */
2944 static unsigned
2945 get_computation_cost (struct ivopts_data *data,
2946 struct iv_use *use, struct iv_cand *cand,
2947 bool address_p, bitmap *depends_on)
2949 return get_computation_cost_at (data,
2950 use, cand, address_p, depends_on, use->stmt);
2953 /* Determines cost of basing replacement of USE on CAND in a generic
2954 expression. */
2956 static void
2957 determine_use_iv_cost_generic (struct ivopts_data *data,
2958 struct iv_use *use, struct iv_cand *cand)
2960 bitmap depends_on;
2961 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2963 set_use_iv_cost (data, use, cand, cost, depends_on);
2966 /* Determines cost of basing replacement of USE on CAND in an address. */
2968 static void
2969 determine_use_iv_cost_address (struct ivopts_data *data,
2970 struct iv_use *use, struct iv_cand *cand)
2972 bitmap depends_on;
2973 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2975 set_use_iv_cost (data, use, cand, cost, depends_on);
2978 /* Computes value of induction variable IV in iteration NITER. */
2980 static tree
2981 iv_value (struct iv *iv, tree niter)
2983 tree val;
2984 tree type = TREE_TYPE (iv->base);
2986 niter = fold_convert (type, niter);
2987 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2989 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2992 /* Computes value of candidate CAND at position AT in iteration NITER. */
2994 static tree
2995 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2997 tree val = iv_value (cand->iv, niter);
2998 tree type = TREE_TYPE (cand->iv->base);
3000 if (stmt_after_increment (loop, cand, at))
3001 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3003 return val;
3006 /* Check whether it is possible to express the condition in USE by comparison
3007 of candidate CAND. If so, store the comparison code to COMPARE and the
3008 value compared with to BOUND. */
3010 static bool
3011 may_eliminate_iv (struct loop *loop,
3012 struct iv_use *use, struct iv_cand *cand,
3013 enum tree_code *compare, tree *bound)
3015 basic_block ex_bb;
3016 edge exit;
3017 struct tree_niter_desc niter, new_niter;
3018 tree wider_type, type, base;
3020 /* For now works only for exits that dominate the loop latch. TODO -- extend
3021 for other conditions inside loop body. */
3022 ex_bb = bb_for_stmt (use->stmt);
3023 if (use->stmt != last_stmt (ex_bb)
3024 || TREE_CODE (use->stmt) != COND_EXPR)
3025 return false;
3026 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3027 return false;
3029 exit = EDGE_SUCC (ex_bb, 0);
3030 if (flow_bb_inside_loop_p (loop, exit->dest))
3031 exit = EDGE_SUCC (ex_bb, 1);
3032 if (flow_bb_inside_loop_p (loop, exit->dest))
3033 return false;
3035 niter.niter = NULL_TREE;
3036 number_of_iterations_exit (loop, exit, &niter);
3037 if (!niter.niter
3038 || !integer_nonzerop (niter.assumptions)
3039 || !integer_zerop (niter.may_be_zero))
3040 return false;
3042 if (exit->flags & EDGE_TRUE_VALUE)
3043 *compare = EQ_EXPR;
3044 else
3045 *compare = NE_EXPR;
3047 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3049 /* Let us check there is not some problem with overflows, by checking that
3050 the number of iterations is unchanged. */
3051 base = cand->iv->base;
3052 type = TREE_TYPE (base);
3053 if (stmt_after_increment (loop, cand, use->stmt))
3054 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3056 new_niter.niter = NULL_TREE;
3057 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3058 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3059 &new_niter);
3060 if (!new_niter.niter
3061 || !integer_nonzerop (new_niter.assumptions)
3062 || !integer_zerop (new_niter.may_be_zero))
3063 return false;
3065 wider_type = TREE_TYPE (new_niter.niter);
3066 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3067 wider_type = TREE_TYPE (niter.niter);
3068 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3069 fold_convert (wider_type, new_niter.niter), 0))
3070 return false;
3072 return true;
3075 /* Determines cost of basing replacement of USE on CAND in a condition. */
3077 static void
3078 determine_use_iv_cost_condition (struct ivopts_data *data,
3079 struct iv_use *use, struct iv_cand *cand)
3081 tree bound;
3082 enum tree_code compare;
3084 /* Only consider real candidates. */
3085 if (!cand->iv)
3087 set_use_iv_cost (data, use, cand, INFTY, NULL);
3088 return;
3091 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3093 bitmap depends_on = NULL;
3094 unsigned cost = force_var_cost (data, bound, &depends_on);
3096 set_use_iv_cost (data, use, cand, cost, depends_on);
3097 return;
3100 /* The induction variable elimination failed; just express the original
3101 giv. If it is compared with an invariant, note that we cannot get
3102 rid of it. */
3103 if (TREE_CODE (*use->op_p) == SSA_NAME)
3104 record_invariant (data, *use->op_p, true);
3105 else
3107 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3108 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3111 determine_use_iv_cost_generic (data, use, cand);
3114 /* Checks whether it is possible to replace the final value of USE by
3115 a direct computation. If so, the formula is stored to *VALUE. */
3117 static bool
3118 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3120 edge exit;
3121 struct tree_niter_desc *niter;
3123 exit = single_dom_exit (loop);
3124 if (!exit)
3125 return false;
3127 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3128 bb_for_stmt (use->stmt)));
3130 niter = &loop_data (loop)->niter;
3131 if (!niter->niter
3132 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3133 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3134 return false;
3136 *value = iv_value (use->iv, niter->niter);
3138 return true;
3141 /* Determines cost of replacing final value of USE using CAND. */
3143 static void
3144 determine_use_iv_cost_outer (struct ivopts_data *data,
3145 struct iv_use *use, struct iv_cand *cand)
3147 bitmap depends_on;
3148 unsigned cost;
3149 edge exit;
3150 tree value;
3151 struct loop *loop = data->current_loop;
3153 if (!cand->iv)
3155 if (!may_replace_final_value (loop, use, &value))
3157 set_use_iv_cost (data, use, cand, INFTY, NULL);
3158 return;
3161 depends_on = NULL;
3162 cost = force_var_cost (data, value, &depends_on);
3164 cost /= AVG_LOOP_NITER (loop);
3166 set_use_iv_cost (data, use, cand, cost, depends_on);
3167 return;
3170 exit = single_dom_exit (loop);
3171 if (exit)
3173 /* If there is just a single exit, we may use value of the candidate
3174 after we take it to determine the value of use. */
3175 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3176 last_stmt (exit->src));
3177 if (cost != INFTY)
3178 cost /= AVG_LOOP_NITER (loop);
3180 else
3182 /* Otherwise we just need to compute the iv. */
3183 cost = get_computation_cost (data, use, cand, false, &depends_on);
3186 set_use_iv_cost (data, use, cand, cost, depends_on);
3189 /* Determines cost of basing replacement of USE on CAND. */
3191 static void
3192 determine_use_iv_cost (struct ivopts_data *data,
3193 struct iv_use *use, struct iv_cand *cand)
3195 switch (use->type)
3197 case USE_NONLINEAR_EXPR:
3198 determine_use_iv_cost_generic (data, use, cand);
3199 break;
3201 case USE_OUTER:
3202 determine_use_iv_cost_outer (data, use, cand);
3203 break;
3205 case USE_ADDRESS:
3206 determine_use_iv_cost_address (data, use, cand);
3207 break;
3209 case USE_COMPARE:
3210 determine_use_iv_cost_condition (data, use, cand);
3211 break;
3213 default:
3214 gcc_unreachable ();
3218 /* Determines costs of basing the use of the iv on an iv candidate. */
3220 static void
3221 determine_use_iv_costs (struct ivopts_data *data)
3223 unsigned i, j;
3224 struct iv_use *use;
3225 struct iv_cand *cand;
3227 data->consider_all_candidates = (n_iv_cands (data)
3228 <= CONSIDER_ALL_CANDIDATES_BOUND);
3230 alloc_use_cost_map (data);
3232 if (!data->consider_all_candidates)
3234 /* Add the important candidate entries. */
3235 for (j = 0; j < n_iv_cands (data); j++)
3237 cand = iv_cand (data, j);
3238 if (!cand->important)
3239 continue;
3240 for (i = 0; i < n_iv_uses (data); i++)
3242 use = iv_use (data, i);
3243 determine_use_iv_cost (data, use, cand);
3248 for (i = 0; i < n_iv_uses (data); i++)
3250 use = iv_use (data, i);
3252 if (data->consider_all_candidates)
3254 for (j = 0; j < n_iv_cands (data); j++)
3256 cand = iv_cand (data, j);
3257 determine_use_iv_cost (data, use, cand);
3260 else
3262 bitmap_iterator bi;
3264 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3266 cand = iv_cand (data, j);
3267 if (!cand->important)
3268 determine_use_iv_cost (data, use, cand);
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, "Use-candidate costs:\n");
3277 for (i = 0; i < n_iv_uses (data); i++)
3279 use = iv_use (data, i);
3281 fprintf (dump_file, "Use %d:\n", i);
3282 fprintf (dump_file, " cand\tcost\tdepends on\n");
3283 for (j = 0; j < use->n_map_members; j++)
3285 if (!use->cost_map[j].cand
3286 || use->cost_map[j].cost == INFTY)
3287 continue;
3289 fprintf (dump_file, " %d\t%d\t",
3290 use->cost_map[j].cand->id,
3291 use->cost_map[j].cost);
3292 if (use->cost_map[j].depends_on)
3293 bitmap_print (dump_file,
3294 use->cost_map[j].depends_on, "","");
3295 fprintf (dump_file, "\n");
3298 fprintf (dump_file, "\n");
3300 fprintf (dump_file, "\n");
3304 /* Determines cost of the candidate CAND. */
3306 static void
3307 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3309 unsigned cost_base, cost_step;
3310 tree base, last;
3311 basic_block bb;
3313 if (!cand->iv)
3315 cand->cost = 0;
3316 return;
3319 /* There are two costs associated with the candidate -- its increment
3320 and its initialization. The second is almost negligible for any loop
3321 that rolls enough, so we take it just very little into account. */
3323 base = cand->iv->base;
3324 cost_base = force_var_cost (data, base, NULL);
3325 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3327 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3329 /* Prefer the original iv unless we may gain something by replacing it. */
3330 if (cand->pos == IP_ORIGINAL)
3331 cand->cost--;
3333 /* Prefer not to insert statements into latch unless there are some
3334 already (so that we do not create unnecessary jumps). */
3335 if (cand->pos == IP_END)
3337 bb = ip_end_pos (data->current_loop);
3338 last = last_stmt (bb);
3340 if (!last
3341 || TREE_CODE (last) == LABEL_EXPR)
3342 cand->cost++;
3346 /* Determines costs of computation of the candidates. */
3348 static void
3349 determine_iv_costs (struct ivopts_data *data)
3351 unsigned i;
3353 if (dump_file && (dump_flags & TDF_DETAILS))
3355 fprintf (dump_file, "Candidate costs:\n");
3356 fprintf (dump_file, " cand\tcost\n");
3359 for (i = 0; i < n_iv_cands (data); i++)
3361 struct iv_cand *cand = iv_cand (data, i);
3363 determine_iv_cost (data, cand);
3365 if (dump_file && (dump_flags & TDF_DETAILS))
3366 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3369 if (dump_file && (dump_flags & TDF_DETAILS))
3370 fprintf (dump_file, "\n");
3373 /* Calculates cost for having SIZE induction variables. */
3375 static unsigned
3376 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3378 return global_cost_for_size (size,
3379 loop_data (data->current_loop)->regs_used,
3380 n_iv_uses (data));
3383 /* For each size of the induction variable set determine the penalty. */
3385 static void
3386 determine_set_costs (struct ivopts_data *data)
3388 unsigned j, n;
3389 tree phi, op;
3390 struct loop *loop = data->current_loop;
3391 bitmap_iterator bi;
3393 /* We use the following model (definitely improvable, especially the
3394 cost function -- TODO):
3396 We estimate the number of registers available (using MD data), name it A.
3398 We estimate the number of registers used by the loop, name it U. This
3399 number is obtained as the number of loop phi nodes (not counting virtual
3400 registers and bivs) + the number of variables from outside of the loop.
3402 We set a reserve R (free regs that are used for temporary computations,
3403 etc.). For now the reserve is a constant 3.
3405 Let I be the number of induction variables.
3407 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3408 make a lot of ivs without a reason).
3409 -- if A - R < U + I <= A, the cost is I * PRES_COST
3410 -- if U + I > A, the cost is I * PRES_COST and
3411 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3413 if (dump_file && (dump_flags & TDF_DETAILS))
3415 fprintf (dump_file, "Global costs:\n");
3416 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3417 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3418 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3419 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3422 n = 0;
3423 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3425 op = PHI_RESULT (phi);
3427 if (!is_gimple_reg (op))
3428 continue;
3430 if (get_iv (data, op))
3431 continue;
3433 n++;
3436 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3438 struct version_info *info = ver_info (data, j);
3440 if (info->inv_id && info->has_nonlin_use)
3441 n++;
3444 loop_data (loop)->regs_used = n;
3445 if (dump_file && (dump_flags & TDF_DETAILS))
3446 fprintf (dump_file, " regs_used %d\n", n);
3448 if (dump_file && (dump_flags & TDF_DETAILS))
3450 fprintf (dump_file, " cost for size:\n");
3451 fprintf (dump_file, " ivs\tcost\n");
3452 for (j = 0; j <= 2 * target_avail_regs; j++)
3453 fprintf (dump_file, " %d\t%d\n", j,
3454 ivopts_global_cost_for_size (data, j));
3455 fprintf (dump_file, "\n");
3459 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3460 taken from the set SOL and they may depend on invariants in the set INV.
3461 The really used candidate and invariants are noted to USED_IVS and
3462 USED_INV. */
3464 static unsigned
3465 find_best_candidate (struct ivopts_data *data,
3466 struct iv_use *use, bitmap sol, bitmap inv,
3467 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3469 unsigned c, d;
3470 unsigned best_cost = INFTY, cost;
3471 struct iv_cand *cnd = NULL, *acnd;
3472 bitmap depends_on = NULL, asol;
3473 bitmap_iterator bi, bi1;
3475 if (data->consider_all_candidates)
3476 asol = sol;
3477 else
3479 asol = BITMAP_XMALLOC ();
3481 bitmap_a_or_b (asol, data->important_candidates, use->related_cands);
3482 bitmap_a_and_b (asol, asol, sol);
3485 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c, bi)
3487 acnd = iv_cand (data, c);
3488 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3490 if (cost == INFTY)
3491 continue;
3492 if (cost > best_cost)
3493 continue;
3494 if (cost == best_cost)
3496 /* Prefer the cheaper iv. */
3497 if (acnd->cost >= cnd->cost)
3498 continue;
3501 if (depends_on)
3503 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d, bi1)
3505 goto next_cand;
3507 if (used_inv)
3508 bitmap_a_or_b (used_inv, used_inv, depends_on);
3511 cnd = acnd;
3512 best_cost = cost;
3514 next_cand: ;
3517 if (cnd && used_ivs)
3518 bitmap_set_bit (used_ivs, cnd->id);
3520 if (cand)
3521 *cand = cnd;
3523 if (!data->consider_all_candidates)
3524 BITMAP_XFREE (asol);
3526 return best_cost;
3529 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3530 induction variable candidates and invariants from the sets. Only
3531 uses 0 .. MAX_USE - 1 are taken into account. */
3533 static unsigned
3534 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3535 unsigned max_use)
3537 unsigned i;
3538 unsigned cost = 0, size = 0, acost;
3539 struct iv_use *use;
3540 struct iv_cand *cand;
3541 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3542 bitmap_iterator bi;
3544 for (i = 0; i < max_use; i++)
3546 use = iv_use (data, i);
3547 acost = find_best_candidate (data, use, sol, inv,
3548 used_ivs, used_inv, NULL);
3549 if (acost == INFTY)
3551 BITMAP_XFREE (used_ivs);
3552 BITMAP_XFREE (used_inv);
3553 return INFTY;
3555 cost += acost;
3558 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i, bi)
3560 cand = iv_cand (data, i);
3562 /* Do not count the pseudocandidates. */
3563 if (cand->iv)
3564 size++;
3566 cost += cand->cost;
3568 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, bi)
3570 size++;
3572 cost += ivopts_global_cost_for_size (data, size);
3574 bitmap_copy (sol, used_ivs);
3575 bitmap_copy (inv, used_inv);
3577 BITMAP_XFREE (used_ivs);
3578 BITMAP_XFREE (used_inv);
3580 return cost;
3583 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3584 induction variable candidates and invariants from the sets. */
3586 static unsigned
3587 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3589 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3592 /* Tries to extend the sets IVS and INV in the best possible way in order
3593 to express the USE. */
3595 static bool
3596 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3597 struct iv_use *use)
3599 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3600 bitmap best_ivs = BITMAP_XMALLOC ();
3601 bitmap best_inv = BITMAP_XMALLOC ();
3602 bitmap act_ivs = BITMAP_XMALLOC ();
3603 bitmap act_inv = BITMAP_XMALLOC ();
3604 unsigned i;
3605 struct cost_pair *cp;
3606 bitmap_iterator bi;
3607 struct iv_cand *cand;
3608 bitmap depends_on;
3610 bitmap_copy (best_ivs, ivs);
3611 bitmap_copy (best_inv, inv);
3613 /* First try important candidates. Only if it fails, try the specific ones.
3614 Rationale -- in loops with many variables the best choice often is to use
3615 just one generic biv. If we added here many ivs specific to the uses,
3616 the optimization algorithm later would be likely to get stuck in a local
3617 minimum, thus causing us to create too many ivs. The approach from
3618 few ivs to more seems more likely to be successful -- starting from few
3619 ivs, replacing an expensive use by a specific iv should always be a
3620 win. */
3621 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
3623 cand = iv_cand (data, i);
3625 if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
3626 continue;
3628 bitmap_copy (act_ivs, ivs);
3629 bitmap_set_bit (act_ivs, cand->id);
3630 if (depends_on)
3631 bitmap_a_or_b (act_inv, inv, depends_on);
3632 else
3633 bitmap_copy (act_inv, inv);
3634 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3636 if (act_cost < best_cost)
3638 best_cost = act_cost;
3639 bitmap_copy (best_ivs, act_ivs);
3640 bitmap_copy (best_inv, act_inv);
3644 if (best_cost == INFTY)
3646 for (i = 0; i < use->n_map_members; i++)
3648 cp = use->cost_map + i;
3649 if (cp->cost == INFTY)
3650 continue;
3652 /* Already tried this. */
3653 if (cp->cand->important)
3654 continue;
3656 bitmap_copy (act_ivs, ivs);
3657 bitmap_set_bit (act_ivs, cp->cand->id);
3658 if (cp->depends_on)
3659 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3660 else
3661 bitmap_copy (act_inv, inv);
3662 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3664 if (act_cost < best_cost)
3666 best_cost = act_cost;
3667 bitmap_copy (best_ivs, act_ivs);
3668 bitmap_copy (best_inv, act_inv);
3673 bitmap_copy (ivs, best_ivs);
3674 bitmap_copy (inv, best_inv);
3676 BITMAP_XFREE (best_ivs);
3677 BITMAP_XFREE (best_inv);
3678 BITMAP_XFREE (act_ivs);
3679 BITMAP_XFREE (act_inv);
3681 return (best_cost != INFTY);
3684 /* Finds an initial set of IVS and invariants INV. We do this by simply
3685 choosing the best candidate for each use. */
3687 static unsigned
3688 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3690 unsigned i;
3692 for (i = 0; i < n_iv_uses (data); i++)
3693 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3694 return INFTY;
3696 return set_cost (data, ivs, inv);
3699 /* Tries to improve set of induction variables IVS and invariants INV to get
3700 it better than COST. */
3702 static bool
3703 try_improve_iv_set (struct ivopts_data *data,
3704 bitmap ivs, bitmap inv, unsigned *cost)
3706 unsigned i, acost;
3707 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3708 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3710 /* Try altering the set of induction variables by one. */
3711 for (i = 0; i < n_iv_cands (data); i++)
3713 bitmap_copy (new_ivs, ivs);
3714 bitmap_copy (new_inv, inv);
3716 if (bitmap_bit_p (ivs, i))
3717 bitmap_clear_bit (new_ivs, i);
3718 else
3719 bitmap_set_bit (new_ivs, i);
3721 acost = set_cost (data, new_ivs, new_inv);
3722 if (acost >= *cost)
3723 continue;
3725 if (!best_new_ivs)
3727 best_new_ivs = BITMAP_XMALLOC ();
3728 best_new_inv = BITMAP_XMALLOC ();
3731 *cost = acost;
3732 bitmap_copy (best_new_ivs, new_ivs);
3733 bitmap_copy (best_new_inv, new_inv);
3736 /* Ditto for invariants. */
3737 for (i = 1; i <= data->max_inv_id; i++)
3739 if (ver_info (data, i)->has_nonlin_use)
3740 continue;
3742 bitmap_copy (new_ivs, ivs);
3743 bitmap_copy (new_inv, inv);
3745 if (bitmap_bit_p (inv, i))
3746 bitmap_clear_bit (new_inv, i);
3747 else
3748 bitmap_set_bit (new_inv, i);
3750 acost = set_cost (data, new_ivs, new_inv);
3751 if (acost >= *cost)
3752 continue;
3754 if (!best_new_ivs)
3756 best_new_ivs = BITMAP_XMALLOC ();
3757 best_new_inv = BITMAP_XMALLOC ();
3760 *cost = acost;
3761 bitmap_copy (best_new_ivs, new_ivs);
3762 bitmap_copy (best_new_inv, new_inv);
3765 BITMAP_XFREE (new_ivs);
3766 BITMAP_XFREE (new_inv);
3768 if (!best_new_ivs)
3769 return false;
3771 bitmap_copy (ivs, best_new_ivs);
3772 bitmap_copy (inv, best_new_inv);
3773 BITMAP_XFREE (best_new_ivs);
3774 BITMAP_XFREE (best_new_inv);
3775 return true;
3778 /* Attempts to find the optimal set of induction variables. We do simple
3779 greedy heuristic -- we try to replace at most one candidate in the selected
3780 solution and remove the unused ivs while this improves the cost. */
3782 static bitmap
3783 find_optimal_iv_set (struct ivopts_data *data)
3785 unsigned cost, i;
3786 bitmap set = BITMAP_XMALLOC ();
3787 bitmap inv = BITMAP_XMALLOC ();
3788 struct iv_use *use;
3790 data->important_candidates = BITMAP_XMALLOC ();
3791 for (i = 0; i < n_iv_cands (data); i++)
3793 struct iv_cand *cand = iv_cand (data, i);
3795 if (cand->important)
3796 bitmap_set_bit (data->important_candidates, i);
3799 /* Set the upper bound. */
3800 cost = get_initial_solution (data, set, inv);
3801 if (cost == INFTY)
3803 if (dump_file && (dump_flags & TDF_DETAILS))
3804 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3805 BITMAP_XFREE (inv);
3806 BITMAP_XFREE (set);
3807 return NULL;
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3813 bitmap_print (dump_file, set, "", "");
3814 fprintf (dump_file, " invariants ");
3815 bitmap_print (dump_file, inv, "", "");
3816 fprintf (dump_file, "\n");
3819 while (try_improve_iv_set (data, set, inv, &cost))
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3823 fprintf (dump_file, "Improved to (cost %d): ", cost);
3824 bitmap_print (dump_file, set, "", "");
3825 fprintf (dump_file, " invariants ");
3826 bitmap_print (dump_file, inv, "", "");
3827 fprintf (dump_file, "\n");
3831 if (dump_file && (dump_flags & TDF_DETAILS))
3832 fprintf (dump_file, "Final cost %d\n\n", cost);
3834 for (i = 0; i < n_iv_uses (data); i++)
3836 use = iv_use (data, i);
3837 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3840 BITMAP_XFREE (inv);
3841 BITMAP_XFREE (data->important_candidates);
3843 return set;
3846 /* Creates a new induction variable corresponding to CAND. */
3848 static void
3849 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3851 block_stmt_iterator incr_pos;
3852 tree base;
3853 bool after = false;
3855 if (!cand->iv)
3856 return;
3858 switch (cand->pos)
3860 case IP_NORMAL:
3861 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3862 break;
3864 case IP_END:
3865 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3866 after = true;
3867 break;
3869 case IP_ORIGINAL:
3870 /* Mark that the iv is preserved. */
3871 name_info (data, cand->var_before)->preserve_biv = true;
3872 name_info (data, cand->var_after)->preserve_biv = true;
3874 /* Rewrite the increment so that it uses var_before directly. */
3875 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3877 return;
3880 gimple_add_tmp_var (cand->var_before);
3881 add_referenced_tmp_var (cand->var_before);
3883 base = unshare_expr (cand->iv->base);
3885 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3886 &incr_pos, after, &cand->var_before, &cand->var_after);
3889 /* Creates new induction variables described in SET. */
3891 static void
3892 create_new_ivs (struct ivopts_data *data, bitmap set)
3894 unsigned i;
3895 struct iv_cand *cand;
3896 bitmap_iterator bi;
3898 EXECUTE_IF_SET_IN_BITMAP (set, 0, i, bi)
3900 cand = iv_cand (data, i);
3901 create_new_iv (data, cand);
3905 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3906 is true, remove also the ssa name defined by the statement. */
3908 static void
3909 remove_statement (tree stmt, bool including_defined_name)
3911 if (TREE_CODE (stmt) == PHI_NODE)
3913 if (!including_defined_name)
3915 /* Prevent the ssa name defined by the statement from being removed. */
3916 SET_PHI_RESULT (stmt, NULL);
3918 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3920 else
3922 block_stmt_iterator bsi = bsi_for_stmt (stmt);
3924 bsi_remove (&bsi);
3928 /* Rewrites USE (definition of iv used in a nonlinear expression)
3929 using candidate CAND. */
3931 static void
3932 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3933 struct iv_use *use, struct iv_cand *cand)
3935 tree comp = unshare_expr (get_computation (data->current_loop,
3936 use, cand));
3937 tree op, stmts, tgt, ass;
3938 block_stmt_iterator bsi, pbsi;
3940 switch (TREE_CODE (use->stmt))
3942 case PHI_NODE:
3943 tgt = PHI_RESULT (use->stmt);
3945 /* If we should keep the biv, do not replace it. */
3946 if (name_info (data, tgt)->preserve_biv)
3947 return;
3949 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3950 while (!bsi_end_p (pbsi)
3951 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3953 bsi = pbsi;
3954 bsi_next (&pbsi);
3956 break;
3958 case MODIFY_EXPR:
3959 tgt = TREE_OPERAND (use->stmt, 0);
3960 bsi = bsi_for_stmt (use->stmt);
3961 break;
3963 default:
3964 gcc_unreachable ();
3967 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3969 if (TREE_CODE (use->stmt) == PHI_NODE)
3971 if (stmts)
3972 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3973 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3974 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3975 remove_statement (use->stmt, false);
3976 SSA_NAME_DEF_STMT (tgt) = ass;
3978 else
3980 if (stmts)
3981 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3982 TREE_OPERAND (use->stmt, 1) = op;
3986 /* Replaces ssa name in index IDX by its basic variable. Callback for
3987 for_each_index. */
3989 static bool
3990 idx_remove_ssa_names (tree base, tree *idx,
3991 void *data ATTRIBUTE_UNUSED)
3993 tree *op;
3995 if (TREE_CODE (*idx) == SSA_NAME)
3996 *idx = SSA_NAME_VAR (*idx);
3998 if (TREE_CODE (base) == ARRAY_REF)
4000 op = &TREE_OPERAND (base, 2);
4001 if (*op
4002 && TREE_CODE (*op) == SSA_NAME)
4003 *op = SSA_NAME_VAR (*op);
4004 op = &TREE_OPERAND (base, 3);
4005 if (*op
4006 && TREE_CODE (*op) == SSA_NAME)
4007 *op = SSA_NAME_VAR (*op);
4010 return true;
4013 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4015 static tree
4016 unshare_and_remove_ssa_names (tree ref)
4018 ref = unshare_expr (ref);
4019 for_each_index (&ref, idx_remove_ssa_names, NULL);
4021 return ref;
4024 /* Rewrites base of memory access OP with expression WITH in statement
4025 pointed to by BSI. */
4027 static void
4028 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4030 tree bvar, var, new_var, new_name, copy, name;
4031 tree orig;
4033 var = bvar = get_base_address (*op);
4035 if (!var || TREE_CODE (with) != SSA_NAME)
4036 goto do_rewrite;
4038 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4039 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4040 if (TREE_CODE (var) == INDIRECT_REF)
4041 var = TREE_OPERAND (var, 0);
4042 if (TREE_CODE (var) == SSA_NAME)
4044 name = var;
4045 var = SSA_NAME_VAR (var);
4047 else if (DECL_P (var))
4048 name = NULL_TREE;
4049 else
4050 goto do_rewrite;
4052 if (var_ann (var)->type_mem_tag)
4053 var = var_ann (var)->type_mem_tag;
4055 /* We need to add a memory tag for the variable. But we do not want
4056 to add it to the temporary used for the computations, since this leads
4057 to problems in redundancy elimination when there are common parts
4058 in two computations referring to the different arrays. So we copy
4059 the variable to a new temporary. */
4060 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4061 if (name)
4062 new_name = duplicate_ssa_name (name, copy);
4063 else
4065 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4066 add_referenced_tmp_var (new_var);
4067 var_ann (new_var)->type_mem_tag = var;
4068 new_name = make_ssa_name (new_var, copy);
4070 TREE_OPERAND (copy, 0) = new_name;
4071 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4072 with = new_name;
4074 do_rewrite:
4076 orig = NULL_TREE;
4077 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4078 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4080 if (TREE_CODE (*op) == INDIRECT_REF)
4081 orig = REF_ORIGINAL (*op);
4082 if (!orig)
4083 orig = unshare_and_remove_ssa_names (*op);
4085 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4087 /* Record the original reference, for purposes of alias analysis. */
4088 REF_ORIGINAL (*op) = orig;
4091 /* Rewrites USE (address that is an iv) using candidate CAND. */
4093 static void
4094 rewrite_use_address (struct ivopts_data *data,
4095 struct iv_use *use, struct iv_cand *cand)
4097 tree comp = unshare_expr (get_computation (data->current_loop,
4098 use, cand));
4099 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4100 tree stmts;
4101 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4103 if (stmts)
4104 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4106 rewrite_address_base (&bsi, use->op_p, op);
4109 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4110 candidate CAND. */
4112 static void
4113 rewrite_use_compare (struct ivopts_data *data,
4114 struct iv_use *use, struct iv_cand *cand)
4116 tree comp;
4117 tree *op_p, cond, op, stmts, bound;
4118 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4119 enum tree_code compare;
4121 if (may_eliminate_iv (data->current_loop,
4122 use, cand, &compare, &bound))
4124 op = force_gimple_operand (unshare_expr (bound), &stmts,
4125 true, NULL_TREE);
4127 if (stmts)
4128 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4130 *use->op_p = build2 (compare, boolean_type_node,
4131 var_at_stmt (data->current_loop,
4132 cand, use->stmt), op);
4133 modify_stmt (use->stmt);
4134 return;
4137 /* The induction variable elimination failed; just express the original
4138 giv. */
4139 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4141 cond = *use->op_p;
4142 op_p = &TREE_OPERAND (cond, 0);
4143 if (TREE_CODE (*op_p) != SSA_NAME
4144 || zero_p (get_iv (data, *op_p)->step))
4145 op_p = &TREE_OPERAND (cond, 1);
4147 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4148 if (stmts)
4149 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4151 *op_p = op;
4154 /* Ensure that operand *OP_P may be used at the end of EXIT without
4155 violating loop closed ssa form. */
4157 static void
4158 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4160 basic_block def_bb;
4161 struct loop *def_loop;
4162 tree phi, use;
4164 use = USE_FROM_PTR (op_p);
4165 if (TREE_CODE (use) != SSA_NAME)
4166 return;
4168 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4169 if (!def_bb)
4170 return;
4172 def_loop = def_bb->loop_father;
4173 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4174 return;
4176 /* Try finding a phi node that copies the value out of the loop. */
4177 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4178 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4179 break;
4181 if (!phi)
4183 /* Create such a phi node. */
4184 tree new_name = duplicate_ssa_name (use, NULL);
4186 phi = create_phi_node (new_name, exit->dest);
4187 SSA_NAME_DEF_STMT (new_name) = phi;
4188 add_phi_arg (&phi, use, exit);
4191 SET_USE (op_p, PHI_RESULT (phi));
4194 /* Ensure that operands of STMT may be used at the end of EXIT without
4195 violating loop closed ssa form. */
4197 static void
4198 protect_loop_closed_ssa_form (edge exit, tree stmt)
4200 use_optype uses;
4201 vuse_optype vuses;
4202 v_may_def_optype v_may_defs;
4203 unsigned i;
4205 get_stmt_operands (stmt);
4207 uses = STMT_USE_OPS (stmt);
4208 for (i = 0; i < NUM_USES (uses); i++)
4209 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4211 vuses = STMT_VUSE_OPS (stmt);
4212 for (i = 0; i < NUM_VUSES (vuses); i++)
4213 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4215 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4216 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4217 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4220 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4221 so that they are emitted on the correct place, and so that the loop closed
4222 ssa form is preserved. */
4224 static void
4225 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4227 tree_stmt_iterator tsi;
4228 block_stmt_iterator bsi;
4229 tree phi, stmt, def, next;
4231 if (EDGE_COUNT (exit->dest->preds) > 1)
4232 split_loop_exit_edge (exit);
4234 if (TREE_CODE (stmts) == STATEMENT_LIST)
4236 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4237 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4239 else
4240 protect_loop_closed_ssa_form (exit, stmts);
4242 /* Ensure there is label in exit->dest, so that we can
4243 insert after it. */
4244 tree_block_label (exit->dest);
4245 bsi = bsi_after_labels (exit->dest);
4246 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4248 if (!op)
4249 return;
4251 for (phi = phi_nodes (exit->dest); phi; phi = next)
4253 next = TREE_CHAIN (phi);
4255 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4257 def = PHI_RESULT (phi);
4258 remove_statement (phi, false);
4259 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4260 def, op);
4261 SSA_NAME_DEF_STMT (def) = stmt;
4262 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4267 /* Rewrites the final value of USE (that is only needed outside of the loop)
4268 using candidate CAND. */
4270 static void
4271 rewrite_use_outer (struct ivopts_data *data,
4272 struct iv_use *use, struct iv_cand *cand)
4274 edge exit;
4275 tree value, op, stmts, tgt;
4276 tree phi;
4278 switch (TREE_CODE (use->stmt))
4280 case PHI_NODE:
4281 tgt = PHI_RESULT (use->stmt);
4282 break;
4283 case MODIFY_EXPR:
4284 tgt = TREE_OPERAND (use->stmt, 0);
4285 break;
4286 default:
4287 gcc_unreachable ();
4290 exit = single_dom_exit (data->current_loop);
4292 if (exit)
4294 if (!cand->iv)
4296 bool ok = may_replace_final_value (data->current_loop, use, &value);
4297 gcc_assert (ok);
4299 else
4300 value = get_computation_at (data->current_loop,
4301 use, cand, last_stmt (exit->src));
4303 value = unshare_expr (value);
4304 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4306 /* If we will preserve the iv anyway and we would need to perform
4307 some computation to replace the final value, do nothing. */
4308 if (stmts && name_info (data, tgt)->preserve_biv)
4309 return;
4311 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4313 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4315 if (USE_FROM_PTR (use_p) == tgt)
4316 SET_USE (use_p, op);
4319 if (stmts)
4320 compute_phi_arg_on_exit (exit, stmts, op);
4322 /* Enable removal of the statement. We cannot remove it directly,
4323 since we may still need the aliasing information attached to the
4324 ssa name defined by it. */
4325 name_info (data, tgt)->iv->have_use_for = false;
4326 return;
4329 /* If the variable is going to be preserved anyway, there is nothing to
4330 do. */
4331 if (name_info (data, tgt)->preserve_biv)
4332 return;
4334 /* Otherwise we just need to compute the iv. */
4335 rewrite_use_nonlinear_expr (data, use, cand);
4338 /* Rewrites USE using candidate CAND. */
4340 static void
4341 rewrite_use (struct ivopts_data *data,
4342 struct iv_use *use, struct iv_cand *cand)
4344 switch (use->type)
4346 case USE_NONLINEAR_EXPR:
4347 rewrite_use_nonlinear_expr (data, use, cand);
4348 break;
4350 case USE_OUTER:
4351 rewrite_use_outer (data, use, cand);
4352 break;
4354 case USE_ADDRESS:
4355 rewrite_use_address (data, use, cand);
4356 break;
4358 case USE_COMPARE:
4359 rewrite_use_compare (data, use, cand);
4360 break;
4362 default:
4363 gcc_unreachable ();
4365 modify_stmt (use->stmt);
4368 /* Rewrite the uses using the selected induction variables. */
4370 static void
4371 rewrite_uses (struct ivopts_data *data)
4373 unsigned i;
4374 struct iv_cand *cand;
4375 struct iv_use *use;
4377 for (i = 0; i < n_iv_uses (data); i++)
4379 use = iv_use (data, i);
4380 cand = use->selected;
4381 gcc_assert (cand);
4383 rewrite_use (data, use, cand);
4387 /* Removes the ivs that are not used after rewriting. */
4389 static void
4390 remove_unused_ivs (struct ivopts_data *data)
4392 unsigned j;
4393 bitmap_iterator bi;
4395 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4397 struct version_info *info;
4399 info = ver_info (data, j);
4400 if (info->iv
4401 && !zero_p (info->iv->step)
4402 && !info->inv_id
4403 && !info->iv->have_use_for
4404 && !info->preserve_biv)
4405 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4409 /* Frees data allocated by the optimization of a single loop. */
4411 static void
4412 free_loop_data (struct ivopts_data *data)
4414 unsigned i, j;
4415 bitmap_iterator bi;
4417 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4419 struct version_info *info;
4421 info = ver_info (data, i);
4422 if (info->iv)
4423 free (info->iv);
4424 info->iv = NULL;
4425 info->has_nonlin_use = false;
4426 info->preserve_biv = false;
4427 info->inv_id = 0;
4429 bitmap_clear (data->relevant);
4431 for (i = 0; i < n_iv_uses (data); i++)
4433 struct iv_use *use = iv_use (data, i);
4435 free (use->iv);
4436 BITMAP_XFREE (use->related_cands);
4437 for (j = 0; j < use->n_map_members; j++)
4438 if (use->cost_map[j].depends_on)
4439 BITMAP_XFREE (use->cost_map[j].depends_on);
4440 free (use->cost_map);
4441 free (use);
4443 VARRAY_POP_ALL (data->iv_uses);
4445 for (i = 0; i < n_iv_cands (data); i++)
4447 struct iv_cand *cand = iv_cand (data, i);
4449 if (cand->iv)
4450 free (cand->iv);
4451 free (cand);
4453 VARRAY_POP_ALL (data->iv_candidates);
4455 if (data->version_info_size < num_ssa_names)
4457 data->version_info_size = 2 * num_ssa_names;
4458 free (data->version_info);
4459 data->version_info = xcalloc (data->version_info_size,
4460 sizeof (struct version_info));
4463 data->max_inv_id = 0;
4465 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4467 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4469 SET_DECL_RTL (obj, NULL_RTX);
4471 VARRAY_POP_ALL (decl_rtl_to_reset);
4474 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4475 loop tree. */
4477 static void
4478 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4480 unsigned i;
4482 for (i = 1; i < loops->num; i++)
4483 if (loops->parray[i])
4485 free (loops->parray[i]->aux);
4486 loops->parray[i]->aux = NULL;
4489 free_loop_data (data);
4490 free (data->version_info);
4491 BITMAP_XFREE (data->relevant);
4493 VARRAY_FREE (decl_rtl_to_reset);
4494 VARRAY_FREE (data->iv_uses);
4495 VARRAY_FREE (data->iv_candidates);
4498 /* Optimizes the LOOP. Returns true if anything changed. */
4500 static bool
4501 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4503 bool changed = false;
4504 bitmap iv_set;
4505 edge exit;
4507 data->current_loop = loop;
4509 if (dump_file && (dump_flags & TDF_DETAILS))
4511 fprintf (dump_file, "Processing loop %d\n", loop->num);
4513 exit = single_dom_exit (loop);
4514 if (exit)
4516 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4517 exit->src->index, exit->dest->index);
4518 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4519 fprintf (dump_file, "\n");
4522 fprintf (dump_file, "\n");
4525 /* For each ssa name determines whether it behaves as an induction variable
4526 in some loop. */
4527 if (!find_induction_variables (data))
4528 goto finish;
4530 /* Finds interesting uses (item 1). */
4531 find_interesting_uses (data);
4532 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4533 goto finish;
4535 /* Finds candidates for the induction variables (item 2). */
4536 find_iv_candidates (data);
4538 /* Calculates the costs (item 3, part 1). */
4539 determine_use_iv_costs (data);
4540 determine_iv_costs (data);
4541 determine_set_costs (data);
4543 /* Find the optimal set of induction variables (item 3, part 2). */
4544 iv_set = find_optimal_iv_set (data);
4545 if (!iv_set)
4546 goto finish;
4547 changed = true;
4549 /* Create the new induction variables (item 4, part 1). */
4550 create_new_ivs (data, iv_set);
4552 /* Rewrite the uses (item 4, part 2). */
4553 rewrite_uses (data);
4555 /* Remove the ivs that are unused after rewriting. */
4556 remove_unused_ivs (data);
4558 loop_commit_inserts ();
4560 BITMAP_XFREE (iv_set);
4562 /* We have changed the structure of induction variables; it might happen
4563 that definitions in the scev database refer to some of them that were
4564 eliminated. */
4565 scev_reset ();
4567 finish:
4568 free_loop_data (data);
4570 return changed;
4573 /* Main entry point. Optimizes induction variables in LOOPS. */
4575 void
4576 tree_ssa_iv_optimize (struct loops *loops)
4578 struct loop *loop;
4579 struct ivopts_data data;
4581 tree_ssa_iv_optimize_init (loops, &data);
4583 /* Optimize the loops starting with the innermost ones. */
4584 loop = loops->tree_root;
4585 while (loop->inner)
4586 loop = loop->inner;
4588 #ifdef ENABLE_CHECKING
4589 verify_loop_closed_ssa ();
4590 verify_stmts ();
4591 #endif
4593 /* Scan the loops, inner ones first. */
4594 while (loop != loops->tree_root)
4596 if (dump_file && (dump_flags & TDF_DETAILS))
4597 flow_loop_dump (loop, dump_file, NULL, 1);
4599 tree_ssa_iv_optimize_loop (&data, loop);
4601 if (loop->next)
4603 loop = loop->next;
4604 while (loop->inner)
4605 loop = loop->inner;
4607 else
4608 loop = loop->outer;
4611 #ifdef ENABLE_CHECKING
4612 verify_loop_closed_ssa ();
4613 verify_stmts ();
4614 #endif
4616 tree_ssa_iv_optimize_finalize (loops, &data);