* trans-stmt.c (gfc_trans_simple_do): New function.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob4dae6c9b938abf40def1a8c45af65597ad8ba30e
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Information attached to loop. */
127 struct loop_data
129 struct tree_niter_desc niter;
130 /* Number of iterations. */
132 unsigned regs_used; /* Number of registers used. */
135 /* Types of uses. */
136 enum use_type
138 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
139 USE_OUTER, /* The induction variable is used outside the loop. */
140 USE_ADDRESS, /* Use in an address. */
141 USE_COMPARE /* Use is a compare. */
144 /* The candidate - cost pair. */
145 struct cost_pair
147 struct iv_cand *cand; /* The candidate. */
148 unsigned cost; /* The cost. */
149 bitmap depends_on; /* The list of invariants that have to be
150 preserved. */
153 /* Use. */
154 struct iv_use
156 unsigned id; /* The id of the use. */
157 enum use_type type; /* Type of the use. */
158 struct iv *iv; /* The induction variable it is based on. */
159 tree stmt; /* Statement in that it occurs. */
160 tree *op_p; /* The place where it occurs. */
161 bitmap related_cands; /* The set of "related" iv candidates. */
163 unsigned n_map_members; /* Number of candidates in the cost_map list. */
164 struct cost_pair *cost_map;
165 /* The costs wrto the iv candidates. */
167 struct iv_cand *selected;
168 /* The selected candidate. */
171 /* The position where the iv is computed. */
172 enum iv_position
174 IP_NORMAL, /* At the end, just before the exit condition. */
175 IP_END, /* At the end of the latch block. */
176 IP_ORIGINAL /* The original biv. */
179 /* The induction variable candidate. */
180 struct iv_cand
182 unsigned id; /* The number of the candidate. */
183 bool important; /* Whether this is an "important" candidate, i.e. such
184 that it should be considered by all uses. */
185 enum iv_position pos; /* Where it is computed. */
186 tree incremented_at; /* For original biv, the statement where it is
187 incremented. */
188 tree var_before; /* The variable used for it before increment. */
189 tree var_after; /* The variable used for it after increment. */
190 struct iv *iv; /* The value of the candidate. NULL for
191 "pseudocandidate" used to indicate the possibility
192 to replace the final value of an iv by direct
193 computation of the value. */
194 unsigned cost; /* Cost of the candidate. */
197 /* The data used by the induction variable optimizations. */
199 struct ivopts_data
201 /* The currently optimized loop. */
202 struct loop *current_loop;
204 /* The size of version_info array allocated. */
205 unsigned version_info_size;
207 /* The array of information for the ssa names. */
208 struct version_info *version_info;
210 /* The bitmap of indices in version_info whose value was changed. */
211 bitmap relevant;
213 /* The maximum invariant id. */
214 unsigned max_inv_id;
216 /* The uses of induction variables. */
217 varray_type iv_uses;
219 /* The candidates. */
220 varray_type iv_candidates;
222 /* Whether to consider just related and important candidates when replacing a
223 use. */
224 bool consider_all_candidates;
227 /* Bound on number of candidates below that all candidates are considered. */
229 #define CONSIDER_ALL_CANDIDATES_BOUND \
230 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
232 /* If there are more iv occurrences, we just give up (it is quite unlikely that
233 optimizing such a loop would help, and it would take ages). */
235 #define MAX_CONSIDERED_USES \
236 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
238 /* The list of trees for that the decl_rtl field must be reset is stored
239 here. */
241 static varray_type decl_rtl_to_reset;
243 /* Number of uses recorded in DATA. */
245 static inline unsigned
246 n_iv_uses (struct ivopts_data *data)
248 return VARRAY_ACTIVE_SIZE (data->iv_uses);
251 /* Ith use recorded in DATA. */
253 static inline struct iv_use *
254 iv_use (struct ivopts_data *data, unsigned i)
256 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
259 /* Number of candidates recorded in DATA. */
261 static inline unsigned
262 n_iv_cands (struct ivopts_data *data)
264 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
267 /* Ith candidate recorded in DATA. */
269 static inline struct iv_cand *
270 iv_cand (struct ivopts_data *data, unsigned i)
272 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
275 /* The data for LOOP. */
277 static inline struct loop_data *
278 loop_data (struct loop *loop)
280 return loop->aux;
283 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 static edge
286 single_dom_exit (struct loop *loop)
288 edge exit = loop->single_exit;
290 if (!exit)
291 return NULL;
293 if (!just_once_each_iteration_p (loop, exit->src))
294 return NULL;
296 return exit;
299 /* Dumps information about the induction variable IV to FILE. */
301 extern void dump_iv (FILE *, struct iv *);
302 void
303 dump_iv (FILE *file, struct iv *iv)
305 if (iv->ssa_name)
307 fprintf (file, "ssa name ");
308 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
309 fprintf (file, "\n");
312 fprintf (file, " type ");
313 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
314 fprintf (file, "\n");
316 if (iv->step)
318 fprintf (file, " base ");
319 print_generic_expr (file, iv->base, TDF_SLIM);
320 fprintf (file, "\n");
322 fprintf (file, " step ");
323 print_generic_expr (file, iv->step, TDF_SLIM);
324 fprintf (file, "\n");
326 else
328 fprintf (file, " invariant ");
329 print_generic_expr (file, iv->base, TDF_SLIM);
330 fprintf (file, "\n");
333 if (iv->base_object)
335 fprintf (file, " base object ");
336 print_generic_expr (file, iv->base_object, TDF_SLIM);
337 fprintf (file, "\n");
340 if (iv->biv_p)
341 fprintf (file, " is a biv\n");
344 /* Dumps information about the USE to FILE. */
346 extern void dump_use (FILE *, struct iv_use *);
347 void
348 dump_use (FILE *file, struct iv_use *use)
350 fprintf (file, "use %d\n", use->id);
352 switch (use->type)
354 case USE_NONLINEAR_EXPR:
355 fprintf (file, " generic\n");
356 break;
358 case USE_OUTER:
359 fprintf (file, " outside\n");
360 break;
362 case USE_ADDRESS:
363 fprintf (file, " address\n");
364 break;
366 case USE_COMPARE:
367 fprintf (file, " compare\n");
368 break;
370 default:
371 gcc_unreachable ();
374 fprintf (file, " in statement ");
375 print_generic_expr (file, use->stmt, TDF_SLIM);
376 fprintf (file, "\n");
378 fprintf (file, " at position ");
379 if (use->op_p)
380 print_generic_expr (file, *use->op_p, TDF_SLIM);
381 fprintf (file, "\n");
383 dump_iv (file, use->iv);
385 fprintf (file, " related candidates ");
386 dump_bitmap (file, use->related_cands);
389 /* Dumps information about the uses to FILE. */
391 extern void dump_uses (FILE *, struct ivopts_data *);
392 void
393 dump_uses (FILE *file, struct ivopts_data *data)
395 unsigned i;
396 struct iv_use *use;
398 for (i = 0; i < n_iv_uses (data); i++)
400 use = iv_use (data, i);
402 dump_use (file, use);
403 fprintf (file, "\n");
407 /* Dumps information about induction variable candidate CAND to FILE. */
409 extern void dump_cand (FILE *, struct iv_cand *);
410 void
411 dump_cand (FILE *file, struct iv_cand *cand)
413 struct iv *iv = cand->iv;
415 fprintf (file, "candidate %d%s\n",
416 cand->id, cand->important ? " (important)" : "");
418 if (!iv)
420 fprintf (file, " final value replacement\n");
421 return;
424 switch (cand->pos)
426 case IP_NORMAL:
427 fprintf (file, " incremented before exit test\n");
428 break;
430 case IP_END:
431 fprintf (file, " incremented at end\n");
432 break;
434 case IP_ORIGINAL:
435 fprintf (file, " original biv\n");
436 break;
439 dump_iv (file, iv);
442 /* Returns the info for ssa version VER. */
444 static inline struct version_info *
445 ver_info (struct ivopts_data *data, unsigned ver)
447 return data->version_info + ver;
450 /* Returns the info for ssa name NAME. */
452 static inline struct version_info *
453 name_info (struct ivopts_data *data, tree name)
455 return ver_info (data, SSA_NAME_VERSION (name));
458 /* Checks whether there exists number X such that X * B = A, counting modulo
459 2^BITS. */
461 static bool
462 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
463 HOST_WIDE_INT *x)
465 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
466 unsigned HOST_WIDE_INT inv, ex, val;
467 unsigned i;
469 a &= mask;
470 b &= mask;
472 /* First divide the whole equation by 2 as long as possible. */
473 while (!(a & 1) && !(b & 1))
475 a >>= 1;
476 b >>= 1;
477 bits--;
478 mask >>= 1;
481 if (!(b & 1))
483 /* If b is still even, a is odd and there is no such x. */
484 return false;
487 /* Find the inverse of b. We compute it as
488 b^(2^(bits - 1) - 1) (mod 2^bits). */
489 inv = 1;
490 ex = b;
491 for (i = 0; i < bits - 1; i++)
493 inv = (inv * ex) & mask;
494 ex = (ex * ex) & mask;
497 val = (a * inv) & mask;
499 gcc_assert (((val * b) & mask) == a);
501 if ((val >> (bits - 1)) & 1)
502 val |= ~mask;
504 *x = val;
506 return true;
509 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
510 emitted in LOOP. */
512 static bool
513 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
515 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
517 gcc_assert (bb);
519 if (sbb == loop->latch)
520 return true;
522 if (sbb != bb)
523 return false;
525 return stmt == last_stmt (bb);
528 /* Returns true if STMT if after the place where the original induction
529 variable CAND is incremented. */
531 static bool
532 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
534 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
535 basic_block stmt_bb = bb_for_stmt (stmt);
536 block_stmt_iterator bsi;
538 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
539 return false;
541 if (stmt_bb != cand_bb)
542 return true;
544 /* Scan the block from the end, since the original ivs are usually
545 incremented at the end of the loop body. */
546 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
548 if (bsi_stmt (bsi) == cand->incremented_at)
549 return false;
550 if (bsi_stmt (bsi) == stmt)
551 return true;
555 /* Returns true if STMT if after the place where the induction variable
556 CAND is incremented in LOOP. */
558 static bool
559 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
561 switch (cand->pos)
563 case IP_END:
564 return false;
566 case IP_NORMAL:
567 return stmt_after_ip_normal_pos (loop, stmt);
569 case IP_ORIGINAL:
570 return stmt_after_ip_original_pos (cand, stmt);
572 default:
573 gcc_unreachable ();
577 /* Initializes data structures used by the iv optimization pass, stored
578 in DATA. LOOPS is the loop tree. */
580 static void
581 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
583 unsigned i;
585 data->version_info_size = 2 * num_ssa_names;
586 data->version_info = xcalloc (data->version_info_size,
587 sizeof (struct version_info));
588 data->relevant = BITMAP_XMALLOC ();
589 data->max_inv_id = 0;
591 for (i = 1; i < loops->num; i++)
592 if (loops->parray[i])
593 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
595 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
596 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
597 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
600 /* Returns a memory object to that EXPR points. In case we are able to
601 determine that it does not point to any such object, NULL is returned. */
603 static tree
604 determine_base_object (tree expr)
606 enum tree_code code = TREE_CODE (expr);
607 tree base, obj, op0, op1;
609 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
610 return NULL_TREE;
612 switch (code)
614 case INTEGER_CST:
615 return NULL_TREE;
617 case ADDR_EXPR:
618 obj = TREE_OPERAND (expr, 0);
619 base = get_base_address (obj);
621 if (!base)
622 return fold_convert (ptr_type_node, expr);
624 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
626 case PLUS_EXPR:
627 case MINUS_EXPR:
628 op0 = determine_base_object (TREE_OPERAND (expr, 0));
629 op1 = determine_base_object (TREE_OPERAND (expr, 1));
631 if (!op1)
632 return op0;
634 if (!op0)
635 return (code == PLUS_EXPR
636 ? op1
637 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
639 return fold (build (code, ptr_type_node, op0, op1));
641 default:
642 return fold_convert (ptr_type_node, expr);
646 /* Allocates an induction variable with given initial value BASE and step STEP
647 for loop LOOP. */
649 static struct iv *
650 alloc_iv (tree base, tree step)
652 struct iv *iv = xcalloc (1, sizeof (struct iv));
654 if (step && integer_zerop (step))
655 step = NULL_TREE;
657 iv->base = base;
658 iv->base_object = determine_base_object (base);
659 iv->step = step;
660 iv->biv_p = false;
661 iv->have_use_for = false;
662 iv->use_id = 0;
663 iv->ssa_name = NULL_TREE;
665 return iv;
668 /* Sets STEP and BASE for induction variable IV. */
670 static void
671 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
673 struct version_info *info = name_info (data, iv);
675 gcc_assert (!info->iv);
677 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
678 info->iv = alloc_iv (base, step);
679 info->iv->ssa_name = iv;
682 /* Finds induction variable declaration for VAR. */
684 static struct iv *
685 get_iv (struct ivopts_data *data, tree var)
687 basic_block bb;
689 if (!name_info (data, var)->iv)
691 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
693 if (!bb
694 || !flow_bb_inside_loop_p (data->current_loop, bb))
695 set_iv (data, var, var, NULL_TREE);
698 return name_info (data, var)->iv;
701 /* Determines the step of a biv defined in PHI. */
703 static tree
704 determine_biv_step (tree phi)
706 struct loop *loop = bb_for_stmt (phi)->loop_father;
707 tree name = PHI_RESULT (phi), base, step;
708 tree type = TREE_TYPE (name);
710 if (!is_gimple_reg (name))
711 return NULL_TREE;
713 if (!simple_iv (loop, phi, name, &base, &step))
714 return NULL_TREE;
716 if (!step)
717 return build_int_cst (type, 0);
719 return step;
722 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
724 static bool
725 abnormal_ssa_name_p (tree exp)
727 if (!exp)
728 return false;
730 if (TREE_CODE (exp) != SSA_NAME)
731 return false;
733 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
736 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
737 abnormal phi node. Callback for for_each_index. */
739 static bool
740 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
741 void *data ATTRIBUTE_UNUSED)
743 if (TREE_CODE (base) == ARRAY_REF)
745 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
746 return false;
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
748 return false;
751 return !abnormal_ssa_name_p (*index);
754 /* Returns true if EXPR contains a ssa name that occurs in an
755 abnormal phi node. */
757 static bool
758 contains_abnormal_ssa_name_p (tree expr)
760 enum tree_code code = TREE_CODE (expr);
761 enum tree_code_class class = TREE_CODE_CLASS (code);
763 if (code == SSA_NAME)
764 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
766 if (code == INTEGER_CST
767 || is_gimple_min_invariant (expr))
768 return false;
770 if (code == ADDR_EXPR)
771 return !for_each_index (&TREE_OPERAND (expr, 1),
772 idx_contains_abnormal_ssa_name_p,
773 NULL);
775 switch (class)
777 case tcc_binary:
778 case tcc_comparison:
779 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
780 return true;
782 /* Fallthru. */
783 case tcc_unary:
784 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
785 return true;
787 break;
789 default:
790 gcc_unreachable ();
793 return false;
796 /* Finds basic ivs. */
798 static bool
799 find_bivs (struct ivopts_data *data)
801 tree phi, step, type, base;
802 bool found = false;
803 struct loop *loop = data->current_loop;
805 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
807 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
808 continue;
810 step = determine_biv_step (phi);
812 if (!step)
813 continue;
814 if (cst_and_fits_in_hwi (step)
815 && int_cst_value (step) == 0)
816 continue;
818 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
819 if (contains_abnormal_ssa_name_p (base))
820 continue;
822 type = TREE_TYPE (PHI_RESULT (phi));
823 base = fold_convert (type, base);
824 step = fold_convert (type, step);
826 /* FIXME: We do not handle induction variables whose step does
827 not satisfy cst_and_fits_in_hwi. */
828 if (!cst_and_fits_in_hwi (step))
829 continue;
831 set_iv (data, PHI_RESULT (phi), base, step);
832 found = true;
835 return found;
838 /* Marks basic ivs. */
840 static void
841 mark_bivs (struct ivopts_data *data)
843 tree phi, var;
844 struct iv *iv, *incr_iv;
845 struct loop *loop = data->current_loop;
846 basic_block incr_bb;
848 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
850 iv = get_iv (data, PHI_RESULT (phi));
851 if (!iv)
852 continue;
854 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
855 incr_iv = get_iv (data, var);
856 if (!incr_iv)
857 continue;
859 /* If the increment is in the subloop, ignore it. */
860 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
861 if (incr_bb->loop_father != data->current_loop
862 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
863 continue;
865 iv->biv_p = true;
866 incr_iv->biv_p = true;
870 /* Checks whether STMT defines a linear induction variable and stores its
871 parameters to BASE and STEP. */
873 static bool
874 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
875 tree *base, tree *step)
877 tree lhs;
878 struct loop *loop = data->current_loop;
880 *base = NULL_TREE;
881 *step = NULL_TREE;
883 if (TREE_CODE (stmt) != MODIFY_EXPR)
884 return false;
886 lhs = TREE_OPERAND (stmt, 0);
887 if (TREE_CODE (lhs) != SSA_NAME)
888 return false;
890 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
891 return false;
893 /* FIXME: We do not handle induction variables whose step does
894 not satisfy cst_and_fits_in_hwi. */
895 if (!zero_p (*step)
896 && !cst_and_fits_in_hwi (*step))
897 return false;
899 if (contains_abnormal_ssa_name_p (*base))
900 return false;
902 return true;
905 /* Finds general ivs in statement STMT. */
907 static void
908 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
910 tree base, step;
912 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
913 return;
915 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
918 /* Finds general ivs in basic block BB. */
920 static void
921 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
923 block_stmt_iterator bsi;
925 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
926 find_givs_in_stmt (data, bsi_stmt (bsi));
929 /* Finds general ivs. */
931 static void
932 find_givs (struct ivopts_data *data)
934 struct loop *loop = data->current_loop;
935 basic_block *body = get_loop_body_in_dom_order (loop);
936 unsigned i;
938 for (i = 0; i < loop->num_nodes; i++)
939 find_givs_in_bb (data, body[i]);
940 free (body);
943 /* Determine the number of iterations of the current loop. */
945 static void
946 determine_number_of_iterations (struct ivopts_data *data)
948 struct loop *loop = data->current_loop;
949 edge exit = single_dom_exit (loop);
951 if (!exit)
952 return;
954 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
957 /* For each ssa name defined in LOOP determines whether it is an induction
958 variable and if so, its initial value and step. */
960 static bool
961 find_induction_variables (struct ivopts_data *data)
963 unsigned i;
964 struct loop *loop = data->current_loop;
965 bitmap_iterator bi;
967 if (!find_bivs (data))
968 return false;
970 find_givs (data);
971 mark_bivs (data);
972 determine_number_of_iterations (data);
974 if (dump_file && (dump_flags & TDF_DETAILS))
976 if (loop_data (loop)->niter.niter)
978 fprintf (dump_file, " number of iterations ");
979 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
980 TDF_SLIM);
981 fprintf (dump_file, "\n");
983 fprintf (dump_file, " may be zero if ");
984 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
985 TDF_SLIM);
986 fprintf (dump_file, "\n");
988 fprintf (dump_file, " bogus unless ");
989 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
990 TDF_SLIM);
991 fprintf (dump_file, "\n");
992 fprintf (dump_file, "\n");
995 fprintf (dump_file, "Induction variables:\n\n");
997 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
999 if (ver_info (data, i)->iv)
1000 dump_iv (dump_file, ver_info (data, i)->iv);
1004 return true;
1007 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1009 static struct iv_use *
1010 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1011 tree stmt, enum use_type use_type)
1013 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1015 use->id = n_iv_uses (data);
1016 use->type = use_type;
1017 use->iv = iv;
1018 use->stmt = stmt;
1019 use->op_p = use_p;
1020 use->related_cands = BITMAP_XMALLOC ();
1022 /* To avoid showing ssa name in the dumps, if it was not reset by the
1023 caller. */
1024 iv->ssa_name = NULL_TREE;
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 dump_use (dump_file, use);
1029 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1031 return use;
1034 /* Checks whether OP is a loop-level invariant and if so, records it.
1035 NONLINEAR_USE is true if the invariant is used in a way we do not
1036 handle specially. */
1038 static void
1039 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1041 basic_block bb;
1042 struct version_info *info;
1044 if (TREE_CODE (op) != SSA_NAME
1045 || !is_gimple_reg (op))
1046 return;
1048 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1049 if (bb
1050 && flow_bb_inside_loop_p (data->current_loop, bb))
1051 return;
1053 info = name_info (data, op);
1054 info->name = op;
1055 info->has_nonlin_use |= nonlinear_use;
1056 if (!info->inv_id)
1057 info->inv_id = ++data->max_inv_id;
1058 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1061 /* Checks whether the use OP is interesting and if so, records it
1062 as TYPE. */
1064 static struct iv_use *
1065 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1066 enum use_type type)
1068 struct iv *iv;
1069 struct iv *civ;
1070 tree stmt;
1071 struct iv_use *use;
1073 if (TREE_CODE (op) != SSA_NAME)
1074 return NULL;
1076 iv = get_iv (data, op);
1077 if (!iv)
1078 return NULL;
1080 if (iv->have_use_for)
1082 use = iv_use (data, iv->use_id);
1084 gcc_assert (use->type == USE_NONLINEAR_EXPR
1085 || use->type == USE_OUTER);
1087 if (type == USE_NONLINEAR_EXPR)
1088 use->type = USE_NONLINEAR_EXPR;
1089 return use;
1092 if (zero_p (iv->step))
1094 record_invariant (data, op, true);
1095 return NULL;
1097 iv->have_use_for = true;
1099 civ = xmalloc (sizeof (struct iv));
1100 *civ = *iv;
1102 stmt = SSA_NAME_DEF_STMT (op);
1103 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1104 || TREE_CODE (stmt) == MODIFY_EXPR);
1106 use = record_use (data, NULL, civ, stmt, type);
1107 iv->use_id = use->id;
1109 return use;
1112 /* Checks whether the use OP is interesting and if so, records it. */
1114 static struct iv_use *
1115 find_interesting_uses_op (struct ivopts_data *data, tree op)
1117 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1120 /* Records a definition of induction variable OP that is used outside of the
1121 loop. */
1123 static struct iv_use *
1124 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1126 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1129 /* Checks whether the condition *COND_P in STMT is interesting
1130 and if so, records it. */
1132 static void
1133 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1135 tree *op0_p;
1136 tree *op1_p;
1137 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1138 struct iv const_iv;
1139 tree zero = integer_zero_node;
1141 const_iv.step = NULL_TREE;
1143 if (integer_zerop (*cond_p)
1144 || integer_nonzerop (*cond_p))
1145 return;
1147 if (TREE_CODE (*cond_p) == SSA_NAME)
1149 op0_p = cond_p;
1150 op1_p = &zero;
1152 else
1154 op0_p = &TREE_OPERAND (*cond_p, 0);
1155 op1_p = &TREE_OPERAND (*cond_p, 1);
1158 if (TREE_CODE (*op0_p) == SSA_NAME)
1159 iv0 = get_iv (data, *op0_p);
1160 else
1161 iv0 = &const_iv;
1163 if (TREE_CODE (*op1_p) == SSA_NAME)
1164 iv1 = get_iv (data, *op1_p);
1165 else
1166 iv1 = &const_iv;
1168 if (/* When comparing with non-invariant value, we may not do any senseful
1169 induction variable elimination. */
1170 (!iv0 || !iv1)
1171 /* Eliminating condition based on two ivs would be nontrivial.
1172 ??? TODO -- it is not really important to handle this case. */
1173 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1175 find_interesting_uses_op (data, *op0_p);
1176 find_interesting_uses_op (data, *op1_p);
1177 return;
1180 if (zero_p (iv0->step) && zero_p (iv1->step))
1182 /* If both are invariants, this is a work for unswitching. */
1183 return;
1186 civ = xmalloc (sizeof (struct iv));
1187 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1188 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1191 /* Returns true if expression EXPR is obviously invariant in LOOP,
1192 i.e. if all its operands are defined outside of the LOOP. */
1194 static bool
1195 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1197 basic_block def_bb;
1198 unsigned i, len;
1200 if (is_gimple_min_invariant (expr))
1201 return true;
1203 if (TREE_CODE (expr) == SSA_NAME)
1205 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1206 if (def_bb
1207 && flow_bb_inside_loop_p (loop, def_bb))
1208 return false;
1210 return true;
1213 if (!EXPR_P (expr))
1214 return false;
1216 len = first_rtl_op (TREE_CODE (expr));
1217 for (i = 0; i < len; i++)
1218 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1219 return false;
1221 return true;
1224 /* Cumulates the steps of indices into DATA and replaces their values with the
1225 initial ones. Returns false when the value of the index cannot be determined.
1226 Callback for for_each_index. */
1228 struct ifs_ivopts_data
1230 struct ivopts_data *ivopts_data;
1231 tree stmt;
1232 tree *step_p;
1235 static bool
1236 idx_find_step (tree base, tree *idx, void *data)
1238 struct ifs_ivopts_data *dta = data;
1239 struct iv *iv;
1240 tree step, type, iv_type, iv_step, lbound, off;
1241 struct loop *loop = dta->ivopts_data->current_loop;
1243 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1244 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1245 return false;
1247 /* If base is a component ref, require that the offset of the reference
1248 is invariant. */
1249 if (TREE_CODE (base) == COMPONENT_REF)
1251 off = component_ref_field_offset (base);
1252 return expr_invariant_in_loop_p (loop, off);
1255 /* If base is array, first check whether we will be able to move the
1256 reference out of the loop (in order to take its address in strength
1257 reduction). In order for this to work we need both lower bound
1258 and step to be loop invariants. */
1259 if (TREE_CODE (base) == ARRAY_REF)
1261 step = array_ref_element_size (base);
1262 lbound = array_ref_low_bound (base);
1264 if (!expr_invariant_in_loop_p (loop, step)
1265 || !expr_invariant_in_loop_p (loop, lbound))
1266 return false;
1269 if (TREE_CODE (*idx) != SSA_NAME)
1270 return true;
1272 iv = get_iv (dta->ivopts_data, *idx);
1273 if (!iv)
1274 return false;
1276 *idx = iv->base;
1278 if (!iv->step)
1279 return true;
1281 iv_type = TREE_TYPE (iv->base);
1282 type = build_pointer_type (TREE_TYPE (base));
1283 if (TREE_CODE (base) == ARRAY_REF)
1285 step = array_ref_element_size (base);
1287 /* We only handle addresses whose step is an integer constant. */
1288 if (TREE_CODE (step) != INTEGER_CST)
1289 return false;
1291 else
1292 /* The step for pointer arithmetics already is 1 byte. */
1293 step = build_int_cst (type, 1);
1295 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1296 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1297 type, iv->base, iv->step, dta->stmt);
1298 else
1299 iv_step = fold_convert (iv_type, iv->step);
1301 if (!iv_step)
1303 /* The index might wrap. */
1304 return false;
1307 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1309 if (!*dta->step_p)
1310 *dta->step_p = step;
1311 else
1312 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1314 return true;
1317 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1318 object is passed to it in DATA. */
1320 static bool
1321 idx_record_use (tree base, tree *idx,
1322 void *data)
1324 find_interesting_uses_op (data, *idx);
1325 if (TREE_CODE (base) == ARRAY_REF)
1327 find_interesting_uses_op (data, array_ref_element_size (base));
1328 find_interesting_uses_op (data, array_ref_low_bound (base));
1330 return true;
1333 /* Finds addresses in *OP_P inside STMT. */
1335 static void
1336 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1338 tree base = unshare_expr (*op_p), step = NULL;
1339 struct iv *civ;
1340 struct ifs_ivopts_data ifs_ivopts_data;
1342 /* Ignore bitfields for now. Not really something terribly complicated
1343 to handle. TODO. */
1344 if (TREE_CODE (base) == COMPONENT_REF
1345 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1346 goto fail;
1348 ifs_ivopts_data.ivopts_data = data;
1349 ifs_ivopts_data.stmt = stmt;
1350 ifs_ivopts_data.step_p = &step;
1351 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1352 || zero_p (step))
1353 goto fail;
1355 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1356 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1358 if (TREE_CODE (base) == INDIRECT_REF)
1359 base = TREE_OPERAND (base, 0);
1360 else
1361 base = build_addr (base);
1363 civ = alloc_iv (base, step);
1364 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1365 return;
1367 fail:
1368 for_each_index (op_p, idx_record_use, data);
1371 /* Finds and records invariants used in STMT. */
1373 static void
1374 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1376 use_optype uses = NULL;
1377 unsigned i, n;
1378 tree op;
1380 if (TREE_CODE (stmt) == PHI_NODE)
1381 n = PHI_NUM_ARGS (stmt);
1382 else
1384 get_stmt_operands (stmt);
1385 uses = STMT_USE_OPS (stmt);
1386 n = NUM_USES (uses);
1389 for (i = 0; i < n; i++)
1391 if (TREE_CODE (stmt) == PHI_NODE)
1392 op = PHI_ARG_DEF (stmt, i);
1393 else
1394 op = USE_OP (uses, i);
1396 record_invariant (data, op, false);
1400 /* Finds interesting uses of induction variables in the statement STMT. */
1402 static void
1403 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1405 struct iv *iv;
1406 tree op, lhs, rhs;
1407 use_optype uses = NULL;
1408 unsigned i, n;
1410 find_invariants_stmt (data, stmt);
1412 if (TREE_CODE (stmt) == COND_EXPR)
1414 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1415 return;
1418 if (TREE_CODE (stmt) == MODIFY_EXPR)
1420 lhs = TREE_OPERAND (stmt, 0);
1421 rhs = TREE_OPERAND (stmt, 1);
1423 if (TREE_CODE (lhs) == SSA_NAME)
1425 /* If the statement defines an induction variable, the uses are not
1426 interesting by themselves. */
1428 iv = get_iv (data, lhs);
1430 if (iv && !zero_p (iv->step))
1431 return;
1434 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1436 case tcc_comparison:
1437 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1438 return;
1440 case tcc_reference:
1441 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1442 if (REFERENCE_CLASS_P (lhs))
1443 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1444 return;
1446 default: ;
1449 if (REFERENCE_CLASS_P (lhs)
1450 && is_gimple_val (rhs))
1452 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1453 find_interesting_uses_op (data, rhs);
1454 return;
1457 /* TODO -- we should also handle address uses of type
1459 memory = call (whatever);
1463 call (memory). */
1466 if (TREE_CODE (stmt) == PHI_NODE
1467 && bb_for_stmt (stmt) == data->current_loop->header)
1469 lhs = PHI_RESULT (stmt);
1470 iv = get_iv (data, lhs);
1472 if (iv && !zero_p (iv->step))
1473 return;
1476 if (TREE_CODE (stmt) == PHI_NODE)
1477 n = PHI_NUM_ARGS (stmt);
1478 else
1480 uses = STMT_USE_OPS (stmt);
1481 n = NUM_USES (uses);
1484 for (i = 0; i < n; i++)
1486 if (TREE_CODE (stmt) == PHI_NODE)
1487 op = PHI_ARG_DEF (stmt, i);
1488 else
1489 op = USE_OP (uses, i);
1491 if (TREE_CODE (op) != SSA_NAME)
1492 continue;
1494 iv = get_iv (data, op);
1495 if (!iv)
1496 continue;
1498 find_interesting_uses_op (data, op);
1502 /* Finds interesting uses of induction variables outside of loops
1503 on loop exit edge EXIT. */
1505 static void
1506 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1508 tree phi, def;
1510 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1512 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1513 find_interesting_uses_outer (data, def);
1517 /* Finds uses of the induction variables that are interesting. */
1519 static void
1520 find_interesting_uses (struct ivopts_data *data)
1522 basic_block bb;
1523 block_stmt_iterator bsi;
1524 tree phi;
1525 basic_block *body = get_loop_body (data->current_loop);
1526 unsigned i;
1527 struct version_info *info;
1528 edge e;
1530 if (dump_file && (dump_flags & TDF_DETAILS))
1531 fprintf (dump_file, "Uses:\n\n");
1533 for (i = 0; i < data->current_loop->num_nodes; i++)
1535 edge_iterator ei;
1536 bb = body[i];
1538 FOR_EACH_EDGE (e, ei, bb->succs)
1539 if (e->dest != EXIT_BLOCK_PTR
1540 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1541 find_interesting_uses_outside (data, e);
1543 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1544 find_interesting_uses_stmt (data, phi);
1545 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1546 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1549 if (dump_file && (dump_flags & TDF_DETAILS))
1551 bitmap_iterator bi;
1553 fprintf (dump_file, "\n");
1555 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1557 info = ver_info (data, i);
1558 if (info->inv_id)
1560 fprintf (dump_file, " ");
1561 print_generic_expr (dump_file, info->name, TDF_SLIM);
1562 fprintf (dump_file, " is invariant (%d)%s\n",
1563 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1567 fprintf (dump_file, "\n");
1570 free (body);
1573 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1574 position to POS. If USE is not NULL, the candidate is set as related to
1575 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1576 replacement of the final value of the iv by a direct computation. */
1578 static struct iv_cand *
1579 add_candidate_1 (struct ivopts_data *data,
1580 tree base, tree step, bool important, enum iv_position pos,
1581 struct iv_use *use, tree incremented_at)
1583 unsigned i;
1584 struct iv_cand *cand = NULL;
1585 tree type;
1587 if (base)
1589 type = TREE_TYPE (base);
1590 if (!TYPE_UNSIGNED (type))
1592 type = unsigned_type_for (type);
1593 base = fold_convert (type, base);
1594 if (step)
1595 step = fold_convert (type, step);
1599 for (i = 0; i < n_iv_cands (data); i++)
1601 cand = iv_cand (data, i);
1603 if (cand->pos != pos)
1604 continue;
1606 if (cand->incremented_at != incremented_at)
1607 continue;
1609 if (!cand->iv)
1611 if (!base && !step)
1612 break;
1614 continue;
1617 if (!base && !step)
1618 continue;
1620 if (!operand_equal_p (base, cand->iv->base, 0))
1621 continue;
1623 if (zero_p (cand->iv->step))
1625 if (zero_p (step))
1626 break;
1628 else
1630 if (step && operand_equal_p (step, cand->iv->step, 0))
1631 break;
1635 if (i == n_iv_cands (data))
1637 cand = xcalloc (1, sizeof (struct iv_cand));
1638 cand->id = i;
1640 if (!base && !step)
1641 cand->iv = NULL;
1642 else
1643 cand->iv = alloc_iv (base, step);
1645 cand->pos = pos;
1646 if (pos != IP_ORIGINAL && cand->iv)
1648 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1649 cand->var_after = cand->var_before;
1651 cand->important = important;
1652 cand->incremented_at = incremented_at;
1653 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1655 if (dump_file && (dump_flags & TDF_DETAILS))
1656 dump_cand (dump_file, cand);
1659 if (important && !cand->important)
1661 cand->important = true;
1662 if (dump_file && (dump_flags & TDF_DETAILS))
1663 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1666 if (use)
1668 bitmap_set_bit (use->related_cands, i);
1669 if (dump_file && (dump_flags & TDF_DETAILS))
1670 fprintf (dump_file, "Candidate %d is related to use %d\n",
1671 cand->id, use->id);
1674 return cand;
1677 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1678 position to POS. If USE is not NULL, the candidate is set as related to
1679 it. The candidate computation is scheduled on all available positions. */
1681 static void
1682 add_candidate (struct ivopts_data *data,
1683 tree base, tree step, bool important, struct iv_use *use)
1685 if (ip_normal_pos (data->current_loop))
1686 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1687 if (ip_end_pos (data->current_loop))
1688 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1691 /* Adds standard iv candidates. */
1693 static void
1694 add_standard_iv_candidates (struct ivopts_data *data)
1696 /* Add 0 + 1 * iteration candidate. */
1697 add_candidate (data,
1698 build_int_cst (unsigned_intSI_type_node, 0),
1699 build_int_cst (unsigned_intSI_type_node, 1),
1700 true, NULL);
1702 /* The same for a long type if it is still fast enough. */
1703 if (BITS_PER_WORD > 32)
1704 add_candidate (data,
1705 build_int_cst (unsigned_intDI_type_node, 0),
1706 build_int_cst (unsigned_intDI_type_node, 1),
1707 true, NULL);
1711 /* Adds candidates bases on the old induction variable IV. */
1713 static void
1714 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1716 tree phi, def;
1717 struct iv_cand *cand;
1719 add_candidate (data, iv->base, iv->step, true, NULL);
1721 /* The same, but with initial value zero. */
1722 add_candidate (data,
1723 build_int_cst (TREE_TYPE (iv->base), 0),
1724 iv->step, true, NULL);
1726 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1727 if (TREE_CODE (phi) == PHI_NODE)
1729 /* Additionally record the possibility of leaving the original iv
1730 untouched. */
1731 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1732 cand = add_candidate_1 (data,
1733 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1734 SSA_NAME_DEF_STMT (def));
1735 cand->var_before = iv->ssa_name;
1736 cand->var_after = def;
1740 /* Adds candidates based on the old induction variables. */
1742 static void
1743 add_old_ivs_candidates (struct ivopts_data *data)
1745 unsigned i;
1746 struct iv *iv;
1747 bitmap_iterator bi;
1749 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1751 iv = ver_info (data, i)->iv;
1752 if (iv && iv->biv_p && !zero_p (iv->step))
1753 add_old_iv_candidates (data, iv);
1757 /* Adds candidates based on the value of the induction variable IV and USE. */
1759 static void
1760 add_iv_value_candidates (struct ivopts_data *data,
1761 struct iv *iv, struct iv_use *use)
1763 add_candidate (data, iv->base, iv->step, false, use);
1765 /* The same, but with initial value zero. */
1766 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1767 iv->step, false, use);
1770 /* Adds candidates based on the address IV and USE. */
1772 static void
1773 add_address_candidates (struct ivopts_data *data,
1774 struct iv *iv, struct iv_use *use)
1776 tree base, abase, tmp, *act;
1778 /* First, the trivial choices. */
1779 add_iv_value_candidates (data, iv, use);
1781 /* Second, try removing the COMPONENT_REFs. */
1782 if (TREE_CODE (iv->base) == ADDR_EXPR)
1784 base = TREE_OPERAND (iv->base, 0);
1785 while (TREE_CODE (base) == COMPONENT_REF
1786 || (TREE_CODE (base) == ARRAY_REF
1787 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1788 base = TREE_OPERAND (base, 0);
1790 if (base != TREE_OPERAND (iv->base, 0))
1792 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1793 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1795 if (TREE_CODE (base) == INDIRECT_REF)
1796 base = TREE_OPERAND (base, 0);
1797 else
1798 base = build_addr (base);
1799 add_candidate (data, base, iv->step, false, use);
1803 /* Third, try removing the constant offset. */
1804 abase = iv->base;
1805 while (TREE_CODE (abase) == PLUS_EXPR
1806 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1807 abase = TREE_OPERAND (abase, 0);
1808 /* We found the offset, so make the copy of the non-shared part and
1809 remove it. */
1810 if (TREE_CODE (abase) == PLUS_EXPR)
1812 tmp = iv->base;
1813 act = &base;
1815 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1817 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1818 NULL_TREE, TREE_OPERAND (tmp, 1));
1819 act = &TREE_OPERAND (*act, 0);
1821 *act = TREE_OPERAND (tmp, 0);
1823 add_candidate (data, base, iv->step, false, use);
1827 /* Possibly adds pseudocandidate for replacing the final value of USE by
1828 a direct computation. */
1830 static void
1831 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1833 struct tree_niter_desc *niter;
1834 struct loop *loop = data->current_loop;
1836 /* We must know where we exit the loop and how many times does it roll. */
1837 if (!single_dom_exit (loop))
1838 return;
1840 niter = &loop_data (loop)->niter;
1841 if (!niter->niter
1842 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1843 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1844 return;
1846 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1849 /* Adds candidates based on the uses. */
1851 static void
1852 add_derived_ivs_candidates (struct ivopts_data *data)
1854 unsigned i;
1856 for (i = 0; i < n_iv_uses (data); i++)
1858 struct iv_use *use = iv_use (data, i);
1860 if (!use)
1861 continue;
1863 switch (use->type)
1865 case USE_NONLINEAR_EXPR:
1866 case USE_COMPARE:
1867 /* Just add the ivs based on the value of the iv used here. */
1868 add_iv_value_candidates (data, use->iv, use);
1869 break;
1871 case USE_OUTER:
1872 add_iv_value_candidates (data, use->iv, use);
1874 /* Additionally, add the pseudocandidate for the possibility to
1875 replace the final value by a direct computation. */
1876 add_iv_outer_candidates (data, use);
1877 break;
1879 case USE_ADDRESS:
1880 add_address_candidates (data, use->iv, use);
1881 break;
1883 default:
1884 gcc_unreachable ();
1889 /* Finds the candidates for the induction variables. */
1891 static void
1892 find_iv_candidates (struct ivopts_data *data)
1894 /* Add commonly used ivs. */
1895 add_standard_iv_candidates (data);
1897 /* Add old induction variables. */
1898 add_old_ivs_candidates (data);
1900 /* Add induction variables derived from uses. */
1901 add_derived_ivs_candidates (data);
1904 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1905 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1906 we allocate a simple list to every use. */
1908 static void
1909 alloc_use_cost_map (struct ivopts_data *data)
1911 unsigned i, n_imp = 0, size, j;
1913 if (!data->consider_all_candidates)
1915 for (i = 0; i < n_iv_cands (data); i++)
1917 struct iv_cand *cand = iv_cand (data, i);
1918 if (cand->important)
1919 n_imp++;
1923 for (i = 0; i < n_iv_uses (data); i++)
1925 struct iv_use *use = iv_use (data, i);
1926 bitmap_iterator bi;
1928 if (data->consider_all_candidates)
1930 size = n_iv_cands (data);
1931 use->n_map_members = size;
1933 else
1935 size = n_imp;
1936 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
1938 size++;
1940 use->n_map_members = 0;
1943 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1947 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1948 on invariants DEPENDS_ON. */
1950 static void
1951 set_use_iv_cost (struct ivopts_data *data,
1952 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1953 bitmap depends_on)
1955 if (cost == INFTY
1956 && depends_on)
1958 BITMAP_XFREE (depends_on);
1959 depends_on = NULL;
1962 if (data->consider_all_candidates)
1964 use->cost_map[cand->id].cand = cand;
1965 use->cost_map[cand->id].cost = cost;
1966 use->cost_map[cand->id].depends_on = depends_on;
1967 return;
1970 if (cost == INFTY)
1971 return;
1973 use->cost_map[use->n_map_members].cand = cand;
1974 use->cost_map[use->n_map_members].cost = cost;
1975 use->cost_map[use->n_map_members].depends_on = depends_on;
1976 use->n_map_members++;
1979 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1980 DEPENDS_ON. */
1982 static unsigned
1983 get_use_iv_cost (struct ivopts_data *data,
1984 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1986 unsigned i;
1988 if (!cand)
1989 return INFTY;
1991 if (data->consider_all_candidates)
1992 i = cand->id;
1993 else
1995 for (i = 0; i < use->n_map_members; i++)
1996 if (use->cost_map[i].cand == cand)
1997 break;
1999 if (i == use->n_map_members)
2000 return INFTY;
2003 if (depends_on)
2004 *depends_on = use->cost_map[i].depends_on;
2005 return use->cost_map[i].cost;
2008 /* Returns estimate on cost of computing SEQ. */
2010 static unsigned
2011 seq_cost (rtx seq)
2013 unsigned cost = 0;
2014 rtx set;
2016 for (; seq; seq = NEXT_INSN (seq))
2018 set = single_set (seq);
2019 if (set)
2020 cost += rtx_cost (set, SET);
2021 else
2022 cost++;
2025 return cost;
2028 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2029 static rtx
2030 produce_memory_decl_rtl (tree obj, int *regno)
2032 rtx x;
2033 if (!obj)
2034 abort ();
2035 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2037 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2038 x = gen_rtx_SYMBOL_REF (Pmode, name);
2040 else
2041 x = gen_raw_REG (Pmode, (*regno)++);
2043 return gen_rtx_MEM (DECL_MODE (obj), x);
2046 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2047 walk_tree. DATA contains the actual fake register number. */
2049 static tree
2050 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2052 tree obj = NULL_TREE;
2053 rtx x = NULL_RTX;
2054 int *regno = data;
2056 switch (TREE_CODE (*expr_p))
2058 case ADDR_EXPR:
2059 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2060 (handled_component_p (*expr_p)
2061 || TREE_CODE (*expr_p) == REALPART_EXPR
2062 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2063 expr_p = &TREE_OPERAND (*expr_p, 0));
2064 obj = *expr_p;
2065 if (DECL_P (obj))
2066 x = produce_memory_decl_rtl (obj, regno);
2067 break;
2069 case SSA_NAME:
2070 *ws = 0;
2071 obj = SSA_NAME_VAR (*expr_p);
2072 if (!DECL_RTL_SET_P (obj))
2073 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2074 break;
2076 case VAR_DECL:
2077 case PARM_DECL:
2078 case RESULT_DECL:
2079 *ws = 0;
2080 obj = *expr_p;
2082 if (DECL_RTL_SET_P (obj))
2083 break;
2085 if (DECL_MODE (obj) == BLKmode)
2086 x = produce_memory_decl_rtl (obj, regno);
2087 else
2088 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2090 break;
2092 default:
2093 break;
2096 if (x)
2098 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2099 SET_DECL_RTL (obj, x);
2102 return NULL_TREE;
2105 /* Determines cost of the computation of EXPR. */
2107 static unsigned
2108 computation_cost (tree expr)
2110 rtx seq, rslt;
2111 tree type = TREE_TYPE (expr);
2112 unsigned cost;
2113 int regno = 0;
2115 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2116 start_sequence ();
2117 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2118 seq = get_insns ();
2119 end_sequence ();
2121 cost = seq_cost (seq);
2122 if (GET_CODE (rslt) == MEM)
2123 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2125 return cost;
2128 /* Returns variable containing the value of candidate CAND at statement AT. */
2130 static tree
2131 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2133 if (stmt_after_increment (loop, cand, stmt))
2134 return cand->var_after;
2135 else
2136 return cand->var_before;
2139 /* Determines the expression by that USE is expressed from induction variable
2140 CAND at statement AT in LOOP. */
2142 static tree
2143 get_computation_at (struct loop *loop,
2144 struct iv_use *use, struct iv_cand *cand, tree at)
2146 tree ubase = use->iv->base;
2147 tree ustep = use->iv->step;
2148 tree cbase = cand->iv->base;
2149 tree cstep = cand->iv->step;
2150 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2151 tree uutype;
2152 tree expr, delta;
2153 tree ratio;
2154 unsigned HOST_WIDE_INT ustepi, cstepi;
2155 HOST_WIDE_INT ratioi;
2157 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2159 /* We do not have a precision to express the values of use. */
2160 return NULL_TREE;
2163 expr = var_at_stmt (loop, cand, at);
2165 if (TREE_TYPE (expr) != ctype)
2167 /* This may happen with the original ivs. */
2168 expr = fold_convert (ctype, expr);
2171 if (TYPE_UNSIGNED (utype))
2172 uutype = utype;
2173 else
2175 uutype = unsigned_type_for (utype);
2176 ubase = fold_convert (uutype, ubase);
2177 ustep = fold_convert (uutype, ustep);
2180 if (uutype != ctype)
2182 expr = fold_convert (uutype, expr);
2183 cbase = fold_convert (uutype, cbase);
2184 cstep = fold_convert (uutype, cstep);
2187 if (!cst_and_fits_in_hwi (cstep)
2188 || !cst_and_fits_in_hwi (ustep))
2189 return NULL_TREE;
2191 ustepi = int_cst_value (ustep);
2192 cstepi = int_cst_value (cstep);
2194 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2196 /* TODO maybe consider case when ustep divides cstep and the ratio is
2197 a power of 2 (so that the division is fast to execute)? We would
2198 need to be much more careful with overflows etc. then. */
2199 return NULL_TREE;
2202 /* We may need to shift the value if we are after the increment. */
2203 if (stmt_after_increment (loop, cand, at))
2204 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2206 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2207 or |ratio| == 1, it is better to handle this like
2209 ubase - ratio * cbase + ratio * var. */
2211 if (ratioi == 1)
2213 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2214 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2216 else if (ratioi == -1)
2218 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2219 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2221 else if (TREE_CODE (cbase) == INTEGER_CST)
2223 ratio = build_int_cst_type (uutype, ratioi);
2224 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2225 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2226 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2227 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2229 else
2231 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2232 ratio = build_int_cst_type (uutype, ratioi);
2233 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2234 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2237 return fold_convert (utype, expr);
2240 /* Determines the expression by that USE is expressed from induction variable
2241 CAND in LOOP. */
2243 static tree
2244 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2246 return get_computation_at (loop, use, cand, use->stmt);
2249 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2251 static void
2252 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2254 tree op0, op1;
2255 enum tree_code code;
2257 while (1)
2259 if (cst_and_fits_in_hwi (*expr))
2261 *offset += int_cst_value (*expr);
2262 *expr = integer_zero_node;
2263 return;
2266 code = TREE_CODE (*expr);
2268 if (code != PLUS_EXPR && code != MINUS_EXPR)
2269 return;
2271 op0 = TREE_OPERAND (*expr, 0);
2272 op1 = TREE_OPERAND (*expr, 1);
2274 if (cst_and_fits_in_hwi (op1))
2276 if (code == PLUS_EXPR)
2277 *offset += int_cst_value (op1);
2278 else
2279 *offset -= int_cst_value (op1);
2281 *expr = op0;
2282 continue;
2285 if (code != PLUS_EXPR)
2286 return;
2288 if (!cst_and_fits_in_hwi (op0))
2289 return;
2291 *offset += int_cst_value (op0);
2292 *expr = op1;
2296 /* Returns cost of addition in MODE. */
2298 static unsigned
2299 add_cost (enum machine_mode mode)
2301 static unsigned costs[NUM_MACHINE_MODES];
2302 rtx seq;
2303 unsigned cost;
2305 if (costs[mode])
2306 return costs[mode];
2308 start_sequence ();
2309 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2310 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2311 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2312 NULL_RTX);
2313 seq = get_insns ();
2314 end_sequence ();
2316 cost = seq_cost (seq);
2317 if (!cost)
2318 cost = 1;
2320 costs[mode] = cost;
2322 if (dump_file && (dump_flags & TDF_DETAILS))
2323 fprintf (dump_file, "Addition in %s costs %d\n",
2324 GET_MODE_NAME (mode), cost);
2325 return cost;
2328 /* Entry in a hashtable of already known costs for multiplication. */
2329 struct mbc_entry
2331 HOST_WIDE_INT cst; /* The constant to multiply by. */
2332 enum machine_mode mode; /* In mode. */
2333 unsigned cost; /* The cost. */
2336 /* Counts hash value for the ENTRY. */
2338 static hashval_t
2339 mbc_entry_hash (const void *entry)
2341 const struct mbc_entry *e = entry;
2343 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2346 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2348 static int
2349 mbc_entry_eq (const void *entry1, const void *entry2)
2351 const struct mbc_entry *e1 = entry1;
2352 const struct mbc_entry *e2 = entry2;
2354 return (e1->mode == e2->mode
2355 && e1->cst == e2->cst);
2358 /* Returns cost of multiplication by constant CST in MODE. */
2360 static unsigned
2361 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2363 static htab_t costs;
2364 struct mbc_entry **cached, act;
2365 rtx seq;
2366 unsigned cost;
2368 if (!costs)
2369 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2371 act.mode = mode;
2372 act.cst = cst;
2373 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2374 if (*cached)
2375 return (*cached)->cost;
2377 *cached = xmalloc (sizeof (struct mbc_entry));
2378 (*cached)->mode = mode;
2379 (*cached)->cst = cst;
2381 start_sequence ();
2382 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2383 NULL_RTX, 0);
2384 seq = get_insns ();
2385 end_sequence ();
2387 cost = seq_cost (seq);
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2391 (int) cst, GET_MODE_NAME (mode), cost);
2393 (*cached)->cost = cost;
2395 return cost;
2398 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2399 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2400 variable is omitted. The created memory accesses MODE.
2402 TODO -- there must be some better way. This all is quite crude. */
2404 static unsigned
2405 get_address_cost (bool symbol_present, bool var_present,
2406 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2408 #define MAX_RATIO 128
2409 static sbitmap valid_mult;
2410 static HOST_WIDE_INT rat, off;
2411 static HOST_WIDE_INT min_offset, max_offset;
2412 static unsigned costs[2][2][2][2];
2413 unsigned cost, acost;
2414 rtx seq, addr, base;
2415 bool offset_p, ratio_p;
2416 rtx reg1;
2417 HOST_WIDE_INT s_offset;
2418 unsigned HOST_WIDE_INT mask;
2419 unsigned bits;
2421 if (!valid_mult)
2423 HOST_WIDE_INT i;
2425 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2427 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2428 for (i = 1; i <= 1 << 20; i <<= 1)
2430 XEXP (addr, 1) = GEN_INT (i);
2431 if (!memory_address_p (Pmode, addr))
2432 break;
2434 max_offset = i >> 1;
2435 off = max_offset;
2437 for (i = 1; i <= 1 << 20; i <<= 1)
2439 XEXP (addr, 1) = GEN_INT (-i);
2440 if (!memory_address_p (Pmode, addr))
2441 break;
2443 min_offset = -(i >> 1);
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2447 fprintf (dump_file, "get_address_cost:\n");
2448 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2449 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2452 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2453 sbitmap_zero (valid_mult);
2454 rat = 1;
2455 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2456 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2458 XEXP (addr, 1) = GEN_INT (i);
2459 if (memory_address_p (Pmode, addr))
2461 SET_BIT (valid_mult, i + MAX_RATIO);
2462 rat = i;
2466 if (dump_file && (dump_flags & TDF_DETAILS))
2468 fprintf (dump_file, " allowed multipliers:");
2469 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2470 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2471 fprintf (dump_file, " %d", (int) i);
2472 fprintf (dump_file, "\n");
2473 fprintf (dump_file, "\n");
2477 bits = GET_MODE_BITSIZE (Pmode);
2478 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2479 offset &= mask;
2480 if ((offset >> (bits - 1) & 1))
2481 offset |= ~mask;
2482 s_offset = offset;
2484 cost = 0;
2485 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2486 ratio_p = (ratio != 1
2487 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2488 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2490 if (ratio != 1 && !ratio_p)
2491 cost += multiply_by_cost (ratio, Pmode);
2493 if (s_offset && !offset_p && !symbol_present)
2495 cost += add_cost (Pmode);
2496 var_present = true;
2499 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2500 if (!acost)
2502 acost = 0;
2504 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2505 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2506 if (ratio_p)
2507 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2509 if (symbol_present)
2511 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2512 if (offset_p)
2513 base = gen_rtx_fmt_e (CONST, Pmode,
2514 gen_rtx_fmt_ee (PLUS, Pmode,
2515 base,
2516 GEN_INT (off)));
2517 if (var_present)
2518 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2521 else if (var_present)
2523 base = reg1;
2524 if (offset_p)
2525 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2527 else if (offset_p)
2528 base = GEN_INT (off);
2529 else
2530 base = NULL_RTX;
2532 if (base)
2533 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2535 start_sequence ();
2536 addr = memory_address (Pmode, addr);
2537 seq = get_insns ();
2538 end_sequence ();
2540 acost = seq_cost (seq);
2541 acost += address_cost (addr, Pmode);
2543 if (!acost)
2544 acost = 1;
2545 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2548 return cost + acost;
2551 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2552 the bitmap to that we should store it. */
2554 static struct ivopts_data *fd_ivopts_data;
2555 static tree
2556 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2558 bitmap *depends_on = data;
2559 struct version_info *info;
2561 if (TREE_CODE (*expr_p) != SSA_NAME)
2562 return NULL_TREE;
2563 info = name_info (fd_ivopts_data, *expr_p);
2565 if (!info->inv_id || info->has_nonlin_use)
2566 return NULL_TREE;
2568 if (!*depends_on)
2569 *depends_on = BITMAP_XMALLOC ();
2570 bitmap_set_bit (*depends_on, info->inv_id);
2572 return NULL_TREE;
2575 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2576 invariants the computation depends on. */
2578 static unsigned
2579 force_var_cost (struct ivopts_data *data,
2580 tree expr, bitmap *depends_on)
2582 static bool costs_initialized = false;
2583 static unsigned integer_cost;
2584 static unsigned symbol_cost;
2585 static unsigned address_cost;
2587 if (!costs_initialized)
2589 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2590 rtx x = gen_rtx_MEM (DECL_MODE (var),
2591 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2592 tree addr;
2593 tree type = build_pointer_type (integer_type_node);
2595 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2596 2000));
2598 SET_DECL_RTL (var, x);
2599 TREE_STATIC (var) = 1;
2600 addr = build1 (ADDR_EXPR, type, var);
2601 symbol_cost = computation_cost (addr) + 1;
2603 address_cost
2604 = computation_cost (build2 (PLUS_EXPR, type,
2605 addr,
2606 build_int_cst_type (type, 2000))) + 1;
2607 if (dump_file && (dump_flags & TDF_DETAILS))
2609 fprintf (dump_file, "force_var_cost:\n");
2610 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2611 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2612 fprintf (dump_file, " address %d\n", (int) address_cost);
2613 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2614 fprintf (dump_file, "\n");
2617 costs_initialized = true;
2620 if (depends_on)
2622 fd_ivopts_data = data;
2623 walk_tree (&expr, find_depends, depends_on, NULL);
2626 if (SSA_VAR_P (expr))
2627 return 0;
2629 if (TREE_INVARIANT (expr))
2631 if (TREE_CODE (expr) == INTEGER_CST)
2632 return integer_cost;
2634 if (TREE_CODE (expr) == ADDR_EXPR)
2636 tree obj = TREE_OPERAND (expr, 0);
2638 if (TREE_CODE (obj) == VAR_DECL
2639 || TREE_CODE (obj) == PARM_DECL
2640 || TREE_CODE (obj) == RESULT_DECL)
2641 return symbol_cost;
2644 return address_cost;
2647 /* Just an arbitrary value, FIXME. */
2648 return target_spill_cost;
2651 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2652 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2653 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2654 invariants the computation depends on. */
2656 static unsigned
2657 split_address_cost (struct ivopts_data *data,
2658 tree addr, bool *symbol_present, bool *var_present,
2659 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2661 tree core;
2662 HOST_WIDE_INT bitsize;
2663 HOST_WIDE_INT bitpos;
2664 tree toffset;
2665 enum machine_mode mode;
2666 int unsignedp, volatilep;
2668 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2669 &unsignedp, &volatilep);
2671 if (toffset != 0
2672 || bitpos % BITS_PER_UNIT != 0
2673 || TREE_CODE (core) != VAR_DECL)
2675 *symbol_present = false;
2676 *var_present = true;
2677 fd_ivopts_data = data;
2678 walk_tree (&addr, find_depends, depends_on, NULL);
2679 return target_spill_cost;
2682 *offset += bitpos / BITS_PER_UNIT;
2683 if (TREE_STATIC (core)
2684 || DECL_EXTERNAL (core))
2686 *symbol_present = true;
2687 *var_present = false;
2688 return 0;
2691 *symbol_present = false;
2692 *var_present = true;
2693 return 0;
2696 /* Estimates cost of expressing difference of addresses E1 - E2 as
2697 var + symbol + offset. The value of offset is added to OFFSET,
2698 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2699 part is missing. DEPENDS_ON is a set of the invariants the computation
2700 depends on. */
2702 static unsigned
2703 ptr_difference_cost (struct ivopts_data *data,
2704 tree e1, tree e2, bool *symbol_present, bool *var_present,
2705 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2707 HOST_WIDE_INT diff = 0;
2708 unsigned cost;
2710 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2712 if (TREE_CODE (e2) == ADDR_EXPR
2713 && ptr_difference_const (TREE_OPERAND (e1, 0),
2714 TREE_OPERAND (e2, 0), &diff))
2716 *offset += diff;
2717 *symbol_present = false;
2718 *var_present = false;
2719 return 0;
2722 if (e2 == integer_zero_node)
2723 return split_address_cost (data, TREE_OPERAND (e1, 0),
2724 symbol_present, var_present, offset, depends_on);
2726 *symbol_present = false;
2727 *var_present = true;
2729 cost = force_var_cost (data, e1, depends_on);
2730 cost += force_var_cost (data, e2, depends_on);
2731 cost += add_cost (Pmode);
2733 return cost;
2736 /* Estimates cost of expressing difference E1 - E2 as
2737 var + symbol + offset. The value of offset is added to OFFSET,
2738 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2739 part is missing. DEPENDS_ON is a set of the invariants the computation
2740 depends on. */
2742 static unsigned
2743 difference_cost (struct ivopts_data *data,
2744 tree e1, tree e2, bool *symbol_present, bool *var_present,
2745 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2747 unsigned cost;
2748 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2750 strip_offset (&e1, offset);
2751 *offset = -*offset;
2752 strip_offset (&e2, offset);
2753 *offset = -*offset;
2755 if (TREE_CODE (e1) == ADDR_EXPR)
2756 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2757 depends_on);
2758 *symbol_present = false;
2760 if (operand_equal_p (e1, e2, 0))
2762 *var_present = false;
2763 return 0;
2765 *var_present = true;
2766 if (zero_p (e2))
2767 return force_var_cost (data, e1, depends_on);
2769 if (zero_p (e1))
2771 cost = force_var_cost (data, e2, depends_on);
2772 cost += multiply_by_cost (-1, mode);
2774 return cost;
2777 cost = force_var_cost (data, e1, depends_on);
2778 cost += force_var_cost (data, e2, depends_on);
2779 cost += add_cost (mode);
2781 return cost;
2784 /* Determines the cost of the computation by that USE is expressed
2785 from induction variable CAND. If ADDRESS_P is true, we just need
2786 to create an address from it, otherwise we want to get it into
2787 register. A set of invariants we depend on is stored in
2788 DEPENDS_ON. AT is the statement at that the value is computed. */
2790 static unsigned
2791 get_computation_cost_at (struct ivopts_data *data,
2792 struct iv_use *use, struct iv_cand *cand,
2793 bool address_p, bitmap *depends_on, tree at)
2795 tree ubase = use->iv->base, ustep = use->iv->step;
2796 tree cbase, cstep;
2797 tree utype = TREE_TYPE (ubase), ctype;
2798 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2799 HOST_WIDE_INT ratio, aratio;
2800 bool var_present, symbol_present;
2801 unsigned cost = 0, n_sums;
2803 *depends_on = NULL;
2805 /* Only consider real candidates. */
2806 if (!cand->iv)
2807 return INFTY;
2809 cbase = cand->iv->base;
2810 cstep = cand->iv->step;
2811 ctype = TREE_TYPE (cbase);
2813 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2815 /* We do not have a precision to express the values of use. */
2816 return INFTY;
2819 if (address_p)
2821 /* Do not try to express address of an object with computation based
2822 on address of a different object. This may cause problems in rtl
2823 level alias analysis (that does not expect this to be happening,
2824 as this is illegal in C), and would be unlikely to be useful
2825 anyway. */
2826 if (use->iv->base_object
2827 && cand->iv->base_object
2828 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2829 return INFTY;
2832 if (!cst_and_fits_in_hwi (ustep)
2833 || !cst_and_fits_in_hwi (cstep))
2834 return INFTY;
2836 if (TREE_CODE (ubase) == INTEGER_CST
2837 && !cst_and_fits_in_hwi (ubase))
2838 goto fallback;
2840 if (TREE_CODE (cbase) == INTEGER_CST
2841 && !cst_and_fits_in_hwi (cbase))
2842 goto fallback;
2844 ustepi = int_cst_value (ustep);
2845 cstepi = int_cst_value (cstep);
2847 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2849 /* TODO -- add direct handling of this case. */
2850 goto fallback;
2853 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2854 return INFTY;
2856 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2857 or ratio == 1, it is better to handle this like
2859 ubase - ratio * cbase + ratio * var
2861 (also holds in the case ratio == -1, TODO. */
2863 if (TREE_CODE (cbase) == INTEGER_CST)
2865 offset = - ratio * int_cst_value (cbase);
2866 cost += difference_cost (data,
2867 ubase, integer_zero_node,
2868 &symbol_present, &var_present, &offset,
2869 depends_on);
2871 else if (ratio == 1)
2873 cost += difference_cost (data,
2874 ubase, cbase,
2875 &symbol_present, &var_present, &offset,
2876 depends_on);
2878 else
2880 cost += force_var_cost (data, cbase, depends_on);
2881 cost += add_cost (TYPE_MODE (ctype));
2882 cost += difference_cost (data,
2883 ubase, integer_zero_node,
2884 &symbol_present, &var_present, &offset,
2885 depends_on);
2888 /* If we are after the increment, the value of the candidate is higher by
2889 one iteration. */
2890 if (stmt_after_increment (data->current_loop, cand, at))
2891 offset -= ratio * cstepi;
2893 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2894 (symbol/var/const parts may be omitted). If we are looking for an address,
2895 find the cost of addressing this. */
2896 if (address_p)
2897 return get_address_cost (symbol_present, var_present, offset, ratio);
2899 /* Otherwise estimate the costs for computing the expression. */
2900 aratio = ratio > 0 ? ratio : -ratio;
2901 if (!symbol_present && !var_present && !offset)
2903 if (ratio != 1)
2904 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2906 return cost;
2909 if (aratio != 1)
2910 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2912 n_sums = 1;
2913 if (var_present
2914 /* Symbol + offset should be compile-time computable. */
2915 && (symbol_present || offset))
2916 n_sums++;
2918 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2920 fallback:
2922 /* Just get the expression, expand it and measure the cost. */
2923 tree comp = get_computation_at (data->current_loop, use, cand, at);
2925 if (!comp)
2926 return INFTY;
2928 if (address_p)
2929 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2931 return computation_cost (comp);
2935 /* Determines the cost of the computation by that USE is expressed
2936 from induction variable CAND. If ADDRESS_P is true, we just need
2937 to create an address from it, otherwise we want to get it into
2938 register. A set of invariants we depend on is stored in
2939 DEPENDS_ON. */
2941 static unsigned
2942 get_computation_cost (struct ivopts_data *data,
2943 struct iv_use *use, struct iv_cand *cand,
2944 bool address_p, bitmap *depends_on)
2946 return get_computation_cost_at (data,
2947 use, cand, address_p, depends_on, use->stmt);
2950 /* Determines cost of basing replacement of USE on CAND in a generic
2951 expression. */
2953 static void
2954 determine_use_iv_cost_generic (struct ivopts_data *data,
2955 struct iv_use *use, struct iv_cand *cand)
2957 bitmap depends_on;
2958 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2960 set_use_iv_cost (data, use, cand, cost, depends_on);
2963 /* Determines cost of basing replacement of USE on CAND in an address. */
2965 static void
2966 determine_use_iv_cost_address (struct ivopts_data *data,
2967 struct iv_use *use, struct iv_cand *cand)
2969 bitmap depends_on;
2970 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2972 set_use_iv_cost (data, use, cand, cost, depends_on);
2975 /* Computes value of induction variable IV in iteration NITER. */
2977 static tree
2978 iv_value (struct iv *iv, tree niter)
2980 tree val;
2981 tree type = TREE_TYPE (iv->base);
2983 niter = fold_convert (type, niter);
2984 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2986 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2989 /* Computes value of candidate CAND at position AT in iteration NITER. */
2991 static tree
2992 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2994 tree val = iv_value (cand->iv, niter);
2995 tree type = TREE_TYPE (cand->iv->base);
2997 if (stmt_after_increment (loop, cand, at))
2998 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3000 return val;
3003 /* Check whether it is possible to express the condition in USE by comparison
3004 of candidate CAND. If so, store the comparison code to COMPARE and the
3005 value compared with to BOUND. */
3007 static bool
3008 may_eliminate_iv (struct loop *loop,
3009 struct iv_use *use, struct iv_cand *cand,
3010 enum tree_code *compare, tree *bound)
3012 basic_block ex_bb;
3013 edge exit;
3014 struct tree_niter_desc niter, new_niter;
3015 tree wider_type, type, base;
3017 /* For now works only for exits that dominate the loop latch. TODO -- extend
3018 for other conditions inside loop body. */
3019 ex_bb = bb_for_stmt (use->stmt);
3020 if (use->stmt != last_stmt (ex_bb)
3021 || TREE_CODE (use->stmt) != COND_EXPR)
3022 return false;
3023 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3024 return false;
3026 exit = EDGE_SUCC (ex_bb, 0);
3027 if (flow_bb_inside_loop_p (loop, exit->dest))
3028 exit = EDGE_SUCC (ex_bb, 1);
3029 if (flow_bb_inside_loop_p (loop, exit->dest))
3030 return false;
3032 niter.niter = NULL_TREE;
3033 number_of_iterations_exit (loop, exit, &niter);
3034 if (!niter.niter
3035 || !integer_nonzerop (niter.assumptions)
3036 || !integer_zerop (niter.may_be_zero))
3037 return false;
3039 if (exit->flags & EDGE_TRUE_VALUE)
3040 *compare = EQ_EXPR;
3041 else
3042 *compare = NE_EXPR;
3044 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3046 /* Let us check there is not some problem with overflows, by checking that
3047 the number of iterations is unchanged. */
3048 base = cand->iv->base;
3049 type = TREE_TYPE (base);
3050 if (stmt_after_increment (loop, cand, use->stmt))
3051 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3053 new_niter.niter = NULL_TREE;
3054 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3055 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3056 &new_niter);
3057 if (!new_niter.niter
3058 || !integer_nonzerop (new_niter.assumptions)
3059 || !integer_zerop (new_niter.may_be_zero))
3060 return false;
3062 wider_type = TREE_TYPE (new_niter.niter);
3063 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3064 wider_type = TREE_TYPE (niter.niter);
3065 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3066 fold_convert (wider_type, new_niter.niter), 0))
3067 return false;
3069 return true;
3072 /* Determines cost of basing replacement of USE on CAND in a condition. */
3074 static void
3075 determine_use_iv_cost_condition (struct ivopts_data *data,
3076 struct iv_use *use, struct iv_cand *cand)
3078 tree bound;
3079 enum tree_code compare;
3081 /* Only consider real candidates. */
3082 if (!cand->iv)
3084 set_use_iv_cost (data, use, cand, INFTY, NULL);
3085 return;
3088 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3090 bitmap depends_on = NULL;
3091 unsigned cost = force_var_cost (data, bound, &depends_on);
3093 set_use_iv_cost (data, use, cand, cost, depends_on);
3094 return;
3097 /* The induction variable elimination failed; just express the original
3098 giv. If it is compared with an invariant, note that we cannot get
3099 rid of it. */
3100 if (TREE_CODE (*use->op_p) == SSA_NAME)
3101 record_invariant (data, *use->op_p, true);
3102 else
3104 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3105 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3108 determine_use_iv_cost_generic (data, use, cand);
3111 /* Checks whether it is possible to replace the final value of USE by
3112 a direct computation. If so, the formula is stored to *VALUE. */
3114 static bool
3115 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3117 edge exit;
3118 struct tree_niter_desc *niter;
3120 exit = single_dom_exit (loop);
3121 if (!exit)
3122 return false;
3124 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3125 bb_for_stmt (use->stmt)));
3127 niter = &loop_data (loop)->niter;
3128 if (!niter->niter
3129 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3130 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3131 return false;
3133 *value = iv_value (use->iv, niter->niter);
3135 return true;
3138 /* Determines cost of replacing final value of USE using CAND. */
3140 static void
3141 determine_use_iv_cost_outer (struct ivopts_data *data,
3142 struct iv_use *use, struct iv_cand *cand)
3144 bitmap depends_on;
3145 unsigned cost;
3146 edge exit;
3147 tree value;
3148 struct loop *loop = data->current_loop;
3150 if (!cand->iv)
3152 if (!may_replace_final_value (loop, use, &value))
3154 set_use_iv_cost (data, use, cand, INFTY, NULL);
3155 return;
3158 depends_on = NULL;
3159 cost = force_var_cost (data, value, &depends_on);
3161 cost /= AVG_LOOP_NITER (loop);
3163 set_use_iv_cost (data, use, cand, cost, depends_on);
3164 return;
3167 exit = single_dom_exit (loop);
3168 if (exit)
3170 /* If there is just a single exit, we may use value of the candidate
3171 after we take it to determine the value of use. */
3172 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3173 last_stmt (exit->src));
3174 if (cost != INFTY)
3175 cost /= AVG_LOOP_NITER (loop);
3177 else
3179 /* Otherwise we just need to compute the iv. */
3180 cost = get_computation_cost (data, use, cand, false, &depends_on);
3183 set_use_iv_cost (data, use, cand, cost, depends_on);
3186 /* Determines cost of basing replacement of USE on CAND. */
3188 static void
3189 determine_use_iv_cost (struct ivopts_data *data,
3190 struct iv_use *use, struct iv_cand *cand)
3192 switch (use->type)
3194 case USE_NONLINEAR_EXPR:
3195 determine_use_iv_cost_generic (data, use, cand);
3196 break;
3198 case USE_OUTER:
3199 determine_use_iv_cost_outer (data, use, cand);
3200 break;
3202 case USE_ADDRESS:
3203 determine_use_iv_cost_address (data, use, cand);
3204 break;
3206 case USE_COMPARE:
3207 determine_use_iv_cost_condition (data, use, cand);
3208 break;
3210 default:
3211 gcc_unreachable ();
3215 /* Determines costs of basing the use of the iv on an iv candidate. */
3217 static void
3218 determine_use_iv_costs (struct ivopts_data *data)
3220 unsigned i, j;
3221 struct iv_use *use;
3222 struct iv_cand *cand;
3224 data->consider_all_candidates = (n_iv_cands (data)
3225 <= CONSIDER_ALL_CANDIDATES_BOUND);
3227 alloc_use_cost_map (data);
3229 if (!data->consider_all_candidates)
3231 /* Add the important candidate entries. */
3232 for (j = 0; j < n_iv_cands (data); j++)
3234 cand = iv_cand (data, j);
3235 if (!cand->important)
3236 continue;
3237 for (i = 0; i < n_iv_uses (data); i++)
3239 use = iv_use (data, i);
3240 determine_use_iv_cost (data, use, cand);
3245 for (i = 0; i < n_iv_uses (data); i++)
3247 use = iv_use (data, i);
3249 if (data->consider_all_candidates)
3251 for (j = 0; j < n_iv_cands (data); j++)
3253 cand = iv_cand (data, j);
3254 determine_use_iv_cost (data, use, cand);
3257 else
3259 bitmap_iterator bi;
3261 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3263 cand = iv_cand (data, j);
3264 if (!cand->important)
3265 determine_use_iv_cost (data, use, cand);
3270 if (dump_file && (dump_flags & TDF_DETAILS))
3272 fprintf (dump_file, "Use-candidate costs:\n");
3274 for (i = 0; i < n_iv_uses (data); i++)
3276 use = iv_use (data, i);
3278 fprintf (dump_file, "Use %d:\n", i);
3279 fprintf (dump_file, " cand\tcost\tdepends on\n");
3280 for (j = 0; j < use->n_map_members; j++)
3282 if (!use->cost_map[j].cand
3283 || use->cost_map[j].cost == INFTY)
3284 continue;
3286 fprintf (dump_file, " %d\t%d\t",
3287 use->cost_map[j].cand->id,
3288 use->cost_map[j].cost);
3289 if (use->cost_map[j].depends_on)
3290 bitmap_print (dump_file,
3291 use->cost_map[j].depends_on, "","");
3292 fprintf (dump_file, "\n");
3295 fprintf (dump_file, "\n");
3297 fprintf (dump_file, "\n");
3301 /* Determines cost of the candidate CAND. */
3303 static void
3304 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3306 unsigned cost_base, cost_step;
3307 tree base, last;
3308 basic_block bb;
3310 if (!cand->iv)
3312 cand->cost = 0;
3313 return;
3316 /* There are two costs associated with the candidate -- its increment
3317 and its initialization. The second is almost negligible for any loop
3318 that rolls enough, so we take it just very little into account. */
3320 base = cand->iv->base;
3321 cost_base = force_var_cost (data, base, NULL);
3322 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3324 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3326 /* Prefer the original iv unless we may gain something by replacing it. */
3327 if (cand->pos == IP_ORIGINAL)
3328 cand->cost--;
3330 /* Prefer not to insert statements into latch unless there are some
3331 already (so that we do not create unnecessary jumps). */
3332 if (cand->pos == IP_END)
3334 bb = ip_end_pos (data->current_loop);
3335 last = last_stmt (bb);
3337 if (!last
3338 || TREE_CODE (last) == LABEL_EXPR)
3339 cand->cost++;
3343 /* Determines costs of computation of the candidates. */
3345 static void
3346 determine_iv_costs (struct ivopts_data *data)
3348 unsigned i;
3350 if (dump_file && (dump_flags & TDF_DETAILS))
3352 fprintf (dump_file, "Candidate costs:\n");
3353 fprintf (dump_file, " cand\tcost\n");
3356 for (i = 0; i < n_iv_cands (data); i++)
3358 struct iv_cand *cand = iv_cand (data, i);
3360 determine_iv_cost (data, cand);
3362 if (dump_file && (dump_flags & TDF_DETAILS))
3363 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3366 if (dump_file && (dump_flags & TDF_DETAILS))
3367 fprintf (dump_file, "\n");
3370 /* Calculates cost for having SIZE induction variables. */
3372 static unsigned
3373 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3375 return global_cost_for_size (size,
3376 loop_data (data->current_loop)->regs_used,
3377 n_iv_uses (data));
3380 /* For each size of the induction variable set determine the penalty. */
3382 static void
3383 determine_set_costs (struct ivopts_data *data)
3385 unsigned j, n;
3386 tree phi, op;
3387 struct loop *loop = data->current_loop;
3388 bitmap_iterator bi;
3390 /* We use the following model (definitely improvable, especially the
3391 cost function -- TODO):
3393 We estimate the number of registers available (using MD data), name it A.
3395 We estimate the number of registers used by the loop, name it U. This
3396 number is obtained as the number of loop phi nodes (not counting virtual
3397 registers and bivs) + the number of variables from outside of the loop.
3399 We set a reserve R (free regs that are used for temporary computations,
3400 etc.). For now the reserve is a constant 3.
3402 Let I be the number of induction variables.
3404 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3405 make a lot of ivs without a reason).
3406 -- if A - R < U + I <= A, the cost is I * PRES_COST
3407 -- if U + I > A, the cost is I * PRES_COST and
3408 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3412 fprintf (dump_file, "Global costs:\n");
3413 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3414 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3415 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3416 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3419 n = 0;
3420 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3422 op = PHI_RESULT (phi);
3424 if (!is_gimple_reg (op))
3425 continue;
3427 if (get_iv (data, op))
3428 continue;
3430 n++;
3433 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3435 struct version_info *info = ver_info (data, j);
3437 if (info->inv_id && info->has_nonlin_use)
3438 n++;
3441 loop_data (loop)->regs_used = n;
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3443 fprintf (dump_file, " regs_used %d\n", n);
3445 if (dump_file && (dump_flags & TDF_DETAILS))
3447 fprintf (dump_file, " cost for size:\n");
3448 fprintf (dump_file, " ivs\tcost\n");
3449 for (j = 0; j <= 2 * target_avail_regs; j++)
3450 fprintf (dump_file, " %d\t%d\n", j,
3451 ivopts_global_cost_for_size (data, j));
3452 fprintf (dump_file, "\n");
3456 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3457 taken from the set SOL and they may depend on invariants in the set INV.
3458 The really used candidate and invariants are noted to USED_IVS and
3459 USED_INV. */
3461 static unsigned
3462 find_best_candidate (struct ivopts_data *data,
3463 struct iv_use *use, bitmap sol, bitmap inv,
3464 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3466 unsigned c, d;
3467 unsigned best_cost = INFTY, cost;
3468 struct iv_cand *cnd = NULL, *acnd;
3469 bitmap depends_on = NULL, asol;
3470 bitmap_iterator bi, bi1;
3472 if (data->consider_all_candidates)
3473 asol = sol;
3474 else
3476 asol = BITMAP_XMALLOC ();
3477 bitmap_a_and_b (asol, sol, use->related_cands);
3480 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c, bi)
3482 acnd = iv_cand (data, c);
3483 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3485 if (cost == INFTY)
3486 continue;
3487 if (cost > best_cost)
3488 continue;
3489 if (cost == best_cost)
3491 /* Prefer the cheaper iv. */
3492 if (acnd->cost >= cnd->cost)
3493 continue;
3496 if (depends_on)
3498 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d, bi1)
3500 goto next_cand;
3502 if (used_inv)
3503 bitmap_a_or_b (used_inv, used_inv, depends_on);
3506 cnd = acnd;
3507 best_cost = cost;
3509 next_cand: ;
3512 if (cnd && used_ivs)
3513 bitmap_set_bit (used_ivs, cnd->id);
3515 if (cand)
3516 *cand = cnd;
3518 if (!data->consider_all_candidates)
3519 BITMAP_XFREE (asol);
3521 return best_cost;
3524 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3525 induction variable candidates and invariants from the sets. Only
3526 uses 0 .. MAX_USE - 1 are taken into account. */
3528 static unsigned
3529 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3530 unsigned max_use)
3532 unsigned i;
3533 unsigned cost = 0, size = 0, acost;
3534 struct iv_use *use;
3535 struct iv_cand *cand;
3536 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3537 bitmap_iterator bi;
3539 for (i = 0; i < max_use; i++)
3541 use = iv_use (data, i);
3542 acost = find_best_candidate (data, use, sol, inv,
3543 used_ivs, used_inv, NULL);
3544 if (acost == INFTY)
3546 BITMAP_XFREE (used_ivs);
3547 BITMAP_XFREE (used_inv);
3548 return INFTY;
3550 cost += acost;
3553 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i, bi)
3555 cand = iv_cand (data, i);
3557 /* Do not count the pseudocandidates. */
3558 if (cand->iv)
3559 size++;
3561 cost += cand->cost;
3563 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, bi)
3565 size++;
3567 cost += ivopts_global_cost_for_size (data, size);
3569 bitmap_copy (sol, used_ivs);
3570 bitmap_copy (inv, used_inv);
3572 BITMAP_XFREE (used_ivs);
3573 BITMAP_XFREE (used_inv);
3575 return cost;
3578 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3579 induction variable candidates and invariants from the sets. */
3581 static unsigned
3582 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3584 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3587 /* Tries to extend the sets IVS and INV in the best possible way in order
3588 to express the USE. */
3590 static bool
3591 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3592 struct iv_use *use)
3594 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3595 bitmap best_ivs = BITMAP_XMALLOC ();
3596 bitmap best_inv = BITMAP_XMALLOC ();
3597 bitmap act_ivs = BITMAP_XMALLOC ();
3598 bitmap act_inv = BITMAP_XMALLOC ();
3599 unsigned i;
3600 struct cost_pair *cp;
3602 bitmap_copy (best_ivs, ivs);
3603 bitmap_copy (best_inv, inv);
3605 for (i = 0; i < use->n_map_members; i++)
3607 cp = use->cost_map + i;
3608 if (cp->cost == INFTY)
3609 continue;
3611 bitmap_copy (act_ivs, ivs);
3612 bitmap_set_bit (act_ivs, cp->cand->id);
3613 if (cp->depends_on)
3614 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3615 else
3616 bitmap_copy (act_inv, inv);
3617 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3619 if (act_cost < best_cost)
3621 best_cost = act_cost;
3622 bitmap_copy (best_ivs, act_ivs);
3623 bitmap_copy (best_inv, act_inv);
3627 bitmap_copy (ivs, best_ivs);
3628 bitmap_copy (inv, best_inv);
3630 BITMAP_XFREE (best_ivs);
3631 BITMAP_XFREE (best_inv);
3632 BITMAP_XFREE (act_ivs);
3633 BITMAP_XFREE (act_inv);
3635 return (best_cost != INFTY);
3638 /* Finds an initial set of IVS and invariants INV. We do this by simply
3639 choosing the best candidate for each use. */
3641 static unsigned
3642 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3644 unsigned i;
3646 for (i = 0; i < n_iv_uses (data); i++)
3647 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3648 return INFTY;
3650 return set_cost (data, ivs, inv);
3653 /* Tries to improve set of induction variables IVS and invariants INV to get
3654 it better than COST. */
3656 static bool
3657 try_improve_iv_set (struct ivopts_data *data,
3658 bitmap ivs, bitmap inv, unsigned *cost)
3660 unsigned i, acost;
3661 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3662 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3664 /* Try altering the set of induction variables by one. */
3665 for (i = 0; i < n_iv_cands (data); i++)
3667 bitmap_copy (new_ivs, ivs);
3668 bitmap_copy (new_inv, inv);
3670 if (bitmap_bit_p (ivs, i))
3671 bitmap_clear_bit (new_ivs, i);
3672 else
3673 bitmap_set_bit (new_ivs, i);
3675 acost = set_cost (data, new_ivs, new_inv);
3676 if (acost >= *cost)
3677 continue;
3679 if (!best_new_ivs)
3681 best_new_ivs = BITMAP_XMALLOC ();
3682 best_new_inv = BITMAP_XMALLOC ();
3685 *cost = acost;
3686 bitmap_copy (best_new_ivs, new_ivs);
3687 bitmap_copy (best_new_inv, new_inv);
3690 /* Ditto for invariants. */
3691 for (i = 1; i <= data->max_inv_id; i++)
3693 if (ver_info (data, i)->has_nonlin_use)
3694 continue;
3696 bitmap_copy (new_ivs, ivs);
3697 bitmap_copy (new_inv, inv);
3699 if (bitmap_bit_p (inv, i))
3700 bitmap_clear_bit (new_inv, i);
3701 else
3702 bitmap_set_bit (new_inv, i);
3704 acost = set_cost (data, new_ivs, new_inv);
3705 if (acost >= *cost)
3706 continue;
3708 if (!best_new_ivs)
3710 best_new_ivs = BITMAP_XMALLOC ();
3711 best_new_inv = BITMAP_XMALLOC ();
3714 *cost = acost;
3715 bitmap_copy (best_new_ivs, new_ivs);
3716 bitmap_copy (best_new_inv, new_inv);
3719 BITMAP_XFREE (new_ivs);
3720 BITMAP_XFREE (new_inv);
3722 if (!best_new_ivs)
3723 return false;
3725 bitmap_copy (ivs, best_new_ivs);
3726 bitmap_copy (inv, best_new_inv);
3727 BITMAP_XFREE (best_new_ivs);
3728 BITMAP_XFREE (best_new_inv);
3729 return true;
3732 /* Attempts to find the optimal set of induction variables. We do simple
3733 greedy heuristic -- we try to replace at most one candidate in the selected
3734 solution and remove the unused ivs while this improves the cost. */
3736 static bitmap
3737 find_optimal_iv_set (struct ivopts_data *data)
3739 unsigned cost, i;
3740 bitmap set = BITMAP_XMALLOC ();
3741 bitmap inv = BITMAP_XMALLOC ();
3742 struct iv_use *use;
3744 /* Set the upper bound. */
3745 cost = get_initial_solution (data, set, inv);
3746 if (cost == INFTY)
3748 if (dump_file && (dump_flags & TDF_DETAILS))
3749 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3750 BITMAP_XFREE (inv);
3751 BITMAP_XFREE (set);
3752 return NULL;
3755 if (dump_file && (dump_flags & TDF_DETAILS))
3757 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3758 bitmap_print (dump_file, set, "", "");
3759 fprintf (dump_file, " invariants ");
3760 bitmap_print (dump_file, inv, "", "");
3761 fprintf (dump_file, "\n");
3764 while (try_improve_iv_set (data, set, inv, &cost))
3766 if (dump_file && (dump_flags & TDF_DETAILS))
3768 fprintf (dump_file, "Improved to (cost %d): ", cost);
3769 bitmap_print (dump_file, set, "", "");
3770 fprintf (dump_file, " invariants ");
3771 bitmap_print (dump_file, inv, "", "");
3772 fprintf (dump_file, "\n");
3776 if (dump_file && (dump_flags & TDF_DETAILS))
3777 fprintf (dump_file, "Final cost %d\n\n", cost);
3779 for (i = 0; i < n_iv_uses (data); i++)
3781 use = iv_use (data, i);
3782 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3785 BITMAP_XFREE (inv);
3787 return set;
3790 /* Creates a new induction variable corresponding to CAND. */
3792 static void
3793 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3795 block_stmt_iterator incr_pos;
3796 tree base;
3797 bool after = false;
3799 if (!cand->iv)
3800 return;
3802 switch (cand->pos)
3804 case IP_NORMAL:
3805 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3806 break;
3808 case IP_END:
3809 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3810 after = true;
3811 break;
3813 case IP_ORIGINAL:
3814 /* Mark that the iv is preserved. */
3815 name_info (data, cand->var_before)->preserve_biv = true;
3816 name_info (data, cand->var_after)->preserve_biv = true;
3818 /* Rewrite the increment so that it uses var_before directly. */
3819 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3821 return;
3824 gimple_add_tmp_var (cand->var_before);
3825 add_referenced_tmp_var (cand->var_before);
3827 base = unshare_expr (cand->iv->base);
3829 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3830 &incr_pos, after, &cand->var_before, &cand->var_after);
3833 /* Creates new induction variables described in SET. */
3835 static void
3836 create_new_ivs (struct ivopts_data *data, bitmap set)
3838 unsigned i;
3839 struct iv_cand *cand;
3840 bitmap_iterator bi;
3842 EXECUTE_IF_SET_IN_BITMAP (set, 0, i, bi)
3844 cand = iv_cand (data, i);
3845 create_new_iv (data, cand);
3849 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3850 is true, remove also the ssa name defined by the statement. */
3852 static void
3853 remove_statement (tree stmt, bool including_defined_name)
3855 if (TREE_CODE (stmt) == PHI_NODE)
3857 if (!including_defined_name)
3859 /* Prevent the ssa name defined by the statement from being removed. */
3860 SET_PHI_RESULT (stmt, NULL);
3862 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3864 else
3866 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3868 bsi_remove (&bsi);
3872 /* Rewrites USE (definition of iv used in a nonlinear expression)
3873 using candidate CAND. */
3875 static void
3876 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3877 struct iv_use *use, struct iv_cand *cand)
3879 tree comp = unshare_expr (get_computation (data->current_loop,
3880 use, cand));
3881 tree op, stmts, tgt, ass;
3882 block_stmt_iterator bsi, pbsi;
3884 switch (TREE_CODE (use->stmt))
3886 case PHI_NODE:
3887 tgt = PHI_RESULT (use->stmt);
3889 /* If we should keep the biv, do not replace it. */
3890 if (name_info (data, tgt)->preserve_biv)
3891 return;
3893 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3894 while (!bsi_end_p (pbsi)
3895 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3897 bsi = pbsi;
3898 bsi_next (&pbsi);
3900 break;
3902 case MODIFY_EXPR:
3903 tgt = TREE_OPERAND (use->stmt, 0);
3904 bsi = stmt_for_bsi (use->stmt);
3905 break;
3907 default:
3908 gcc_unreachable ();
3911 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3913 if (TREE_CODE (use->stmt) == PHI_NODE)
3915 if (stmts)
3916 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3917 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3918 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3919 remove_statement (use->stmt, false);
3920 SSA_NAME_DEF_STMT (tgt) = ass;
3922 else
3924 if (stmts)
3925 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3926 TREE_OPERAND (use->stmt, 1) = op;
3930 /* Replaces ssa name in index IDX by its basic variable. Callback for
3931 for_each_index. */
3933 static bool
3934 idx_remove_ssa_names (tree base, tree *idx,
3935 void *data ATTRIBUTE_UNUSED)
3937 tree *op;
3939 if (TREE_CODE (*idx) == SSA_NAME)
3940 *idx = SSA_NAME_VAR (*idx);
3942 if (TREE_CODE (base) == ARRAY_REF)
3944 op = &TREE_OPERAND (base, 2);
3945 if (*op
3946 && TREE_CODE (*op) == SSA_NAME)
3947 *op = SSA_NAME_VAR (*op);
3948 op = &TREE_OPERAND (base, 3);
3949 if (*op
3950 && TREE_CODE (*op) == SSA_NAME)
3951 *op = SSA_NAME_VAR (*op);
3954 return true;
3957 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3959 static tree
3960 unshare_and_remove_ssa_names (tree ref)
3962 ref = unshare_expr (ref);
3963 for_each_index (&ref, idx_remove_ssa_names, NULL);
3965 return ref;
3968 /* Rewrites base of memory access OP with expression WITH in statement
3969 pointed to by BSI. */
3971 static void
3972 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3974 tree bvar, var, new_var, new_name, copy, name;
3975 tree orig;
3977 var = bvar = get_base_address (*op);
3979 if (!var || TREE_CODE (with) != SSA_NAME)
3980 goto do_rewrite;
3982 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
3983 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
3984 if (TREE_CODE (var) == INDIRECT_REF)
3985 var = TREE_OPERAND (var, 0);
3986 if (TREE_CODE (var) == SSA_NAME)
3988 name = var;
3989 var = SSA_NAME_VAR (var);
3991 else if (DECL_P (var))
3992 name = NULL_TREE;
3993 else
3994 goto do_rewrite;
3996 if (var_ann (var)->type_mem_tag)
3997 var = var_ann (var)->type_mem_tag;
3999 /* We need to add a memory tag for the variable. But we do not want
4000 to add it to the temporary used for the computations, since this leads
4001 to problems in redundancy elimination when there are common parts
4002 in two computations referring to the different arrays. So we copy
4003 the variable to a new temporary. */
4004 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4005 if (name)
4006 new_name = duplicate_ssa_name (name, copy);
4007 else
4009 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4010 add_referenced_tmp_var (new_var);
4011 var_ann (new_var)->type_mem_tag = var;
4012 new_name = make_ssa_name (new_var, copy);
4014 TREE_OPERAND (copy, 0) = new_name;
4015 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4016 with = new_name;
4018 do_rewrite:
4020 orig = NULL_TREE;
4021 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4022 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4024 if (TREE_CODE (*op) == INDIRECT_REF)
4025 orig = REF_ORIGINAL (*op);
4026 if (!orig)
4027 orig = unshare_and_remove_ssa_names (*op);
4029 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4031 /* Record the original reference, for purposes of alias analysis. */
4032 REF_ORIGINAL (*op) = orig;
4035 /* Rewrites USE (address that is an iv) using candidate CAND. */
4037 static void
4038 rewrite_use_address (struct ivopts_data *data,
4039 struct iv_use *use, struct iv_cand *cand)
4041 tree comp = unshare_expr (get_computation (data->current_loop,
4042 use, cand));
4043 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4044 tree stmts;
4045 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4047 if (stmts)
4048 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4050 rewrite_address_base (&bsi, use->op_p, op);
4053 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4054 candidate CAND. */
4056 static void
4057 rewrite_use_compare (struct ivopts_data *data,
4058 struct iv_use *use, struct iv_cand *cand)
4060 tree comp;
4061 tree *op_p, cond, op, stmts, bound;
4062 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4063 enum tree_code compare;
4065 if (may_eliminate_iv (data->current_loop,
4066 use, cand, &compare, &bound))
4068 op = force_gimple_operand (unshare_expr (bound), &stmts,
4069 true, NULL_TREE);
4071 if (stmts)
4072 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4074 *use->op_p = build2 (compare, boolean_type_node,
4075 var_at_stmt (data->current_loop,
4076 cand, use->stmt), op);
4077 modify_stmt (use->stmt);
4078 return;
4081 /* The induction variable elimination failed; just express the original
4082 giv. */
4083 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4085 cond = *use->op_p;
4086 op_p = &TREE_OPERAND (cond, 0);
4087 if (TREE_CODE (*op_p) != SSA_NAME
4088 || zero_p (get_iv (data, *op_p)->step))
4089 op_p = &TREE_OPERAND (cond, 1);
4091 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4092 if (stmts)
4093 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4095 *op_p = op;
4098 /* Ensure that operand *OP_P may be used at the end of EXIT without
4099 violating loop closed ssa form. */
4101 static void
4102 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4104 basic_block def_bb;
4105 struct loop *def_loop;
4106 tree phi, use;
4108 use = USE_FROM_PTR (op_p);
4109 if (TREE_CODE (use) != SSA_NAME)
4110 return;
4112 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4113 if (!def_bb)
4114 return;
4116 def_loop = def_bb->loop_father;
4117 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4118 return;
4120 /* Try finding a phi node that copies the value out of the loop. */
4121 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4122 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4123 break;
4125 if (!phi)
4127 /* Create such a phi node. */
4128 tree new_name = duplicate_ssa_name (use, NULL);
4130 phi = create_phi_node (new_name, exit->dest);
4131 SSA_NAME_DEF_STMT (new_name) = phi;
4132 add_phi_arg (&phi, use, exit);
4135 SET_USE (op_p, PHI_RESULT (phi));
4138 /* Ensure that operands of STMT may be used at the end of EXIT without
4139 violating loop closed ssa form. */
4141 static void
4142 protect_loop_closed_ssa_form (edge exit, tree stmt)
4144 use_optype uses;
4145 vuse_optype vuses;
4146 v_may_def_optype v_may_defs;
4147 unsigned i;
4149 get_stmt_operands (stmt);
4151 uses = STMT_USE_OPS (stmt);
4152 for (i = 0; i < NUM_USES (uses); i++)
4153 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4155 vuses = STMT_VUSE_OPS (stmt);
4156 for (i = 0; i < NUM_VUSES (vuses); i++)
4157 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4159 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4160 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4161 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4164 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4165 so that they are emitted on the correct place, and so that the loop closed
4166 ssa form is preserved. */
4168 static void
4169 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4171 tree_stmt_iterator tsi;
4172 block_stmt_iterator bsi;
4173 tree phi, stmt, def, next;
4175 if (EDGE_COUNT (exit->dest->preds) > 1)
4176 split_loop_exit_edge (exit);
4178 if (TREE_CODE (stmts) == STATEMENT_LIST)
4180 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4181 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4183 else
4184 protect_loop_closed_ssa_form (exit, stmts);
4186 /* Ensure there is label in exit->dest, so that we can
4187 insert after it. */
4188 tree_block_label (exit->dest);
4189 bsi = bsi_after_labels (exit->dest);
4190 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4192 if (!op)
4193 return;
4195 for (phi = phi_nodes (exit->dest); phi; phi = next)
4197 next = TREE_CHAIN (phi);
4199 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4201 def = PHI_RESULT (phi);
4202 remove_statement (phi, false);
4203 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4204 def, op);
4205 SSA_NAME_DEF_STMT (def) = stmt;
4206 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4211 /* Rewrites the final value of USE (that is only needed outside of the loop)
4212 using candidate CAND. */
4214 static void
4215 rewrite_use_outer (struct ivopts_data *data,
4216 struct iv_use *use, struct iv_cand *cand)
4218 edge exit;
4219 tree value, op, stmts, tgt;
4220 tree phi;
4222 switch (TREE_CODE (use->stmt))
4224 case PHI_NODE:
4225 tgt = PHI_RESULT (use->stmt);
4226 break;
4227 case MODIFY_EXPR:
4228 tgt = TREE_OPERAND (use->stmt, 0);
4229 break;
4230 default:
4231 gcc_unreachable ();
4234 exit = single_dom_exit (data->current_loop);
4236 if (exit)
4238 if (!cand->iv)
4240 bool ok = may_replace_final_value (data->current_loop, use, &value);
4241 gcc_assert (ok);
4243 else
4244 value = get_computation_at (data->current_loop,
4245 use, cand, last_stmt (exit->src));
4247 value = unshare_expr (value);
4248 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4250 /* If we will preserve the iv anyway and we would need to perform
4251 some computation to replace the final value, do nothing. */
4252 if (stmts && name_info (data, tgt)->preserve_biv)
4253 return;
4255 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4257 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4259 if (USE_FROM_PTR (use_p) == tgt)
4260 SET_USE (use_p, op);
4263 if (stmts)
4264 compute_phi_arg_on_exit (exit, stmts, op);
4266 /* Enable removal of the statement. We cannot remove it directly,
4267 since we may still need the aliasing information attached to the
4268 ssa name defined by it. */
4269 name_info (data, tgt)->iv->have_use_for = false;
4270 return;
4273 /* If the variable is going to be preserved anyway, there is nothing to
4274 do. */
4275 if (name_info (data, tgt)->preserve_biv)
4276 return;
4278 /* Otherwise we just need to compute the iv. */
4279 rewrite_use_nonlinear_expr (data, use, cand);
4282 /* Rewrites USE using candidate CAND. */
4284 static void
4285 rewrite_use (struct ivopts_data *data,
4286 struct iv_use *use, struct iv_cand *cand)
4288 switch (use->type)
4290 case USE_NONLINEAR_EXPR:
4291 rewrite_use_nonlinear_expr (data, use, cand);
4292 break;
4294 case USE_OUTER:
4295 rewrite_use_outer (data, use, cand);
4296 break;
4298 case USE_ADDRESS:
4299 rewrite_use_address (data, use, cand);
4300 break;
4302 case USE_COMPARE:
4303 rewrite_use_compare (data, use, cand);
4304 break;
4306 default:
4307 gcc_unreachable ();
4309 modify_stmt (use->stmt);
4312 /* Rewrite the uses using the selected induction variables. */
4314 static void
4315 rewrite_uses (struct ivopts_data *data)
4317 unsigned i;
4318 struct iv_cand *cand;
4319 struct iv_use *use;
4321 for (i = 0; i < n_iv_uses (data); i++)
4323 use = iv_use (data, i);
4324 cand = use->selected;
4325 gcc_assert (cand);
4327 rewrite_use (data, use, cand);
4331 /* Removes the ivs that are not used after rewriting. */
4333 static void
4334 remove_unused_ivs (struct ivopts_data *data)
4336 unsigned j;
4337 bitmap_iterator bi;
4339 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4341 struct version_info *info;
4343 info = ver_info (data, j);
4344 if (info->iv
4345 && !zero_p (info->iv->step)
4346 && !info->inv_id
4347 && !info->iv->have_use_for
4348 && !info->preserve_biv)
4349 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4353 /* Frees data allocated by the optimization of a single loop. */
4355 static void
4356 free_loop_data (struct ivopts_data *data)
4358 unsigned i, j;
4359 bitmap_iterator bi;
4361 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4363 struct version_info *info;
4365 info = ver_info (data, i);
4366 if (info->iv)
4367 free (info->iv);
4368 info->iv = NULL;
4369 info->has_nonlin_use = false;
4370 info->preserve_biv = false;
4371 info->inv_id = 0;
4373 bitmap_clear (data->relevant);
4375 for (i = 0; i < n_iv_uses (data); i++)
4377 struct iv_use *use = iv_use (data, i);
4379 free (use->iv);
4380 BITMAP_XFREE (use->related_cands);
4381 for (j = 0; j < use->n_map_members; j++)
4382 if (use->cost_map[j].depends_on)
4383 BITMAP_XFREE (use->cost_map[j].depends_on);
4384 free (use->cost_map);
4385 free (use);
4387 VARRAY_POP_ALL (data->iv_uses);
4389 for (i = 0; i < n_iv_cands (data); i++)
4391 struct iv_cand *cand = iv_cand (data, i);
4393 if (cand->iv)
4394 free (cand->iv);
4395 free (cand);
4397 VARRAY_POP_ALL (data->iv_candidates);
4399 if (data->version_info_size < num_ssa_names)
4401 data->version_info_size = 2 * num_ssa_names;
4402 free (data->version_info);
4403 data->version_info = xcalloc (data->version_info_size,
4404 sizeof (struct version_info));
4407 data->max_inv_id = 0;
4409 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4411 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4413 SET_DECL_RTL (obj, NULL_RTX);
4415 VARRAY_POP_ALL (decl_rtl_to_reset);
4418 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4419 loop tree. */
4421 static void
4422 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4424 unsigned i;
4426 for (i = 1; i < loops->num; i++)
4427 if (loops->parray[i])
4429 free (loops->parray[i]->aux);
4430 loops->parray[i]->aux = NULL;
4433 free_loop_data (data);
4434 free (data->version_info);
4435 BITMAP_XFREE (data->relevant);
4437 VARRAY_FREE (decl_rtl_to_reset);
4438 VARRAY_FREE (data->iv_uses);
4439 VARRAY_FREE (data->iv_candidates);
4442 /* Optimizes the LOOP. Returns true if anything changed. */
4444 static bool
4445 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4447 bool changed = false;
4448 bitmap iv_set;
4449 edge exit;
4451 data->current_loop = loop;
4453 if (dump_file && (dump_flags & TDF_DETAILS))
4455 fprintf (dump_file, "Processing loop %d\n", loop->num);
4457 exit = single_dom_exit (loop);
4458 if (exit)
4460 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4461 exit->src->index, exit->dest->index);
4462 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4463 fprintf (dump_file, "\n");
4466 fprintf (dump_file, "\n");
4469 /* For each ssa name determines whether it behaves as an induction variable
4470 in some loop. */
4471 if (!find_induction_variables (data))
4472 goto finish;
4474 /* Finds interesting uses (item 1). */
4475 find_interesting_uses (data);
4476 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4477 goto finish;
4479 /* Finds candidates for the induction variables (item 2). */
4480 find_iv_candidates (data);
4482 /* Calculates the costs (item 3, part 1). */
4483 determine_use_iv_costs (data);
4484 determine_iv_costs (data);
4485 determine_set_costs (data);
4487 /* Find the optimal set of induction variables (item 3, part 2). */
4488 iv_set = find_optimal_iv_set (data);
4489 if (!iv_set)
4490 goto finish;
4491 changed = true;
4493 /* Create the new induction variables (item 4, part 1). */
4494 create_new_ivs (data, iv_set);
4496 /* Rewrite the uses (item 4, part 2). */
4497 rewrite_uses (data);
4499 /* Remove the ivs that are unused after rewriting. */
4500 remove_unused_ivs (data);
4502 loop_commit_inserts ();
4504 BITMAP_XFREE (iv_set);
4506 /* We have changed the structure of induction variables; it might happen
4507 that definitions in the scev database refer to some of them that were
4508 eliminated. */
4509 scev_reset ();
4511 finish:
4512 free_loop_data (data);
4514 return changed;
4517 /* Main entry point. Optimizes induction variables in LOOPS. */
4519 void
4520 tree_ssa_iv_optimize (struct loops *loops)
4522 struct loop *loop;
4523 struct ivopts_data data;
4525 tree_ssa_iv_optimize_init (loops, &data);
4527 /* Optimize the loops starting with the innermost ones. */
4528 loop = loops->tree_root;
4529 while (loop->inner)
4530 loop = loop->inner;
4532 #ifdef ENABLE_CHECKING
4533 verify_loop_closed_ssa ();
4534 verify_stmts ();
4535 #endif
4537 /* Scan the loops, inner ones first. */
4538 while (loop != loops->tree_root)
4540 if (dump_file && (dump_flags & TDF_DETAILS))
4541 flow_loop_dump (loop, dump_file, NULL, 1);
4542 if (tree_ssa_iv_optimize_loop (&data, loop))
4544 #ifdef ENABLE_CHECKING
4545 verify_loop_closed_ssa ();
4546 verify_stmts ();
4547 #endif
4550 if (loop->next)
4552 loop = loop->next;
4553 while (loop->inner)
4554 loop = loop->inner;
4556 else
4557 loop = loop->outer;
4560 tree_ssa_iv_optimize_finalize (loops, &data);