* Merge with edge-vector-mergepoint-20040918.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob6bc9ae872ed5800fac5466b4d46ca226f7980ff2
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
126 struct loop_data
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
134 /* Types of uses. */
135 enum use_type
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
144 struct cost_pair
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
149 preserved. */
152 /* Use. */
153 struct iv_use
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 struct ivopts_data
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
222 use. */
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
238 here. */
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
279 return loop->aux;
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
284 static edge
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
289 if (!exit)
290 return NULL;
292 if (!just_once_each_iteration_p (loop, exit->src))
293 return NULL;
295 return exit;
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
301 void
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
312 if (iv->step)
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
322 else
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
329 if (iv->biv_p)
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
336 void
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
343 switch (use->type)
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
347 break;
349 case USE_OUTER:
350 fprintf (file, " outside\n");
351 break;
353 case USE_ADDRESS:
354 fprintf (file, " address\n");
355 break;
357 case USE_COMPARE:
358 fprintf (file, " compare\n");
359 break;
361 default:
362 gcc_unreachable ();
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
370 if (use->op_p)
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
402 void
403 dump_uses (FILE *file, struct ivopts_data *data)
405 unsigned i;
406 struct iv_use *use;
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
420 void
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
428 if (!iv)
430 fprintf (file, " final value replacement\n");
431 return;
434 switch (cand->pos)
436 case IP_NORMAL:
437 fprintf (file, " incremented before exit test\n");
438 break;
440 case IP_END:
441 fprintf (file, " incremented at end\n");
442 break;
444 case IP_ORIGINAL:
445 fprintf (file, " original biv\n");
446 break;
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
453 if (iv->step)
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
463 else
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
488 2^BITS. */
490 static bool
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
492 HOST_WIDE_INT *x)
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
496 unsigned i;
498 a &= mask;
499 b &= mask;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
504 a >>= 1;
505 b >>= 1;
506 bits--;
507 mask >>= 1;
510 if (!(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
513 return false;
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
518 inv = 1;
519 ex = b;
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
531 val |= ~mask;
533 *x = val;
535 return true;
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
539 emitted in LOOP. */
541 static bool
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
546 gcc_assert (bb);
548 if (sbb == loop->latch)
549 return true;
551 if (sbb != bb)
552 return false;
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
560 static bool
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
568 return false;
570 if (stmt_bb != cand_bb)
571 return true;
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
578 return false;
579 if (bsi_stmt (bsi) == stmt)
580 return true;
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
587 static bool
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
590 switch (cand->pos)
592 case IP_END:
593 return false;
595 case IP_NORMAL:
596 return stmt_after_ip_normal_pos (loop, stmt);
598 case IP_ORIGINAL:
599 return stmt_after_ip_original_pos (cand, stmt);
601 default:
602 gcc_unreachable ();
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
609 static void
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
612 unsigned i;
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
630 for loop LOOP. */
632 static struct iv *
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
638 step = NULL_TREE;
640 iv->base = base;
641 iv->step = step;
642 iv->biv_p = false;
643 iv->have_use_for = false;
644 iv->use_id = 0;
645 iv->ssa_name = NULL_TREE;
647 return iv;
650 /* Sets STEP and BASE for induction variable IV. */
652 static void
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
666 static struct iv *
667 get_iv (struct ivopts_data *data, tree var)
669 basic_block bb;
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
675 if (!bb
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
685 static tree
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
693 return NULL_TREE;
695 if (!simple_iv (loop, phi, name, &base, &step))
696 return NULL_TREE;
698 if (!step)
699 return build_int_cst (type, 0);
701 return step;
704 /* Returns false if INDEX is a ssa name that occurs in an
705 abnormal phi node. Callback for for_each_index. */
707 static bool
708 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
709 void *data ATTRIBUTE_UNUSED)
711 if (TREE_CODE (*index) != SSA_NAME)
712 return true;
714 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
717 /* Returns true if EXPR contains a ssa name that occurs in an
718 abnormal phi node. */
720 static bool
721 contains_abnormal_ssa_name_p (tree expr)
723 enum tree_code code = TREE_CODE (expr);
724 enum tree_code_class class = TREE_CODE_CLASS (code);
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
731 return false;
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 1),
735 idx_contains_abnormal_ssa_name_p,
736 NULL);
738 switch (class)
740 case tcc_binary:
741 case tcc_comparison:
742 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
743 return true;
745 /* Fallthru. */
746 case tcc_unary:
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
748 return true;
750 break;
752 default:
753 gcc_unreachable ();
756 return false;
759 /* Finds basic ivs. */
761 static bool
762 find_bivs (struct ivopts_data *data)
764 tree phi, step, type, base;
765 bool found = false;
766 struct loop *loop = data->current_loop;
768 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
770 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
771 continue;
773 step = determine_biv_step (phi);
775 if (!step)
776 continue;
777 if (cst_and_fits_in_hwi (step)
778 && int_cst_value (step) == 0)
779 continue;
781 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
782 if (contains_abnormal_ssa_name_p (base))
783 continue;
785 type = TREE_TYPE (PHI_RESULT (phi));
786 base = fold_convert (type, base);
787 step = fold_convert (type, step);
789 /* FIXME: We do not handle induction variables whose step does
790 not satisfy cst_and_fits_in_hwi. */
791 if (!cst_and_fits_in_hwi (step))
792 continue;
794 set_iv (data, PHI_RESULT (phi), base, step);
795 found = true;
798 return found;
801 /* Marks basic ivs. */
803 static void
804 mark_bivs (struct ivopts_data *data)
806 tree phi, var;
807 struct iv *iv, *incr_iv;
808 struct loop *loop = data->current_loop;
809 basic_block incr_bb;
811 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
813 iv = get_iv (data, PHI_RESULT (phi));
814 if (!iv)
815 continue;
817 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
818 incr_iv = get_iv (data, var);
819 if (!incr_iv)
820 continue;
822 /* If the increment is in the subloop, ignore it. */
823 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
824 if (incr_bb->loop_father != data->current_loop
825 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
826 continue;
828 iv->biv_p = true;
829 incr_iv->biv_p = true;
833 /* Checks whether STMT defines a linear induction variable and stores its
834 parameters to BASE and STEP. */
836 static bool
837 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
838 tree *base, tree *step)
840 tree lhs;
841 struct loop *loop = data->current_loop;
843 *base = NULL_TREE;
844 *step = NULL_TREE;
846 if (TREE_CODE (stmt) != MODIFY_EXPR)
847 return false;
849 lhs = TREE_OPERAND (stmt, 0);
850 if (TREE_CODE (lhs) != SSA_NAME)
851 return false;
853 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
854 return false;
856 /* FIXME: We do not handle induction variables whose step does
857 not satisfy cst_and_fits_in_hwi. */
858 if (!zero_p (*step)
859 && !cst_and_fits_in_hwi (*step))
860 return false;
862 if (contains_abnormal_ssa_name_p (*base))
863 return false;
865 return true;
868 /* Finds general ivs in statement STMT. */
870 static void
871 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
873 tree base, step;
875 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
876 return;
878 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
881 /* Finds general ivs in basic block BB. */
883 static void
884 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
886 block_stmt_iterator bsi;
888 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
889 find_givs_in_stmt (data, bsi_stmt (bsi));
892 /* Finds general ivs. */
894 static void
895 find_givs (struct ivopts_data *data)
897 struct loop *loop = data->current_loop;
898 basic_block *body = get_loop_body_in_dom_order (loop);
899 unsigned i;
901 for (i = 0; i < loop->num_nodes; i++)
902 find_givs_in_bb (data, body[i]);
903 free (body);
906 /* Determine the number of iterations of the current loop. */
908 static void
909 determine_number_of_iterations (struct ivopts_data *data)
911 struct loop *loop = data->current_loop;
912 edge exit = single_dom_exit (loop);
914 if (!exit)
915 return;
917 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
920 /* For each ssa name defined in LOOP determines whether it is an induction
921 variable and if so, its initial value and step. */
923 static bool
924 find_induction_variables (struct ivopts_data *data)
926 unsigned i;
927 struct loop *loop = data->current_loop;
929 if (!find_bivs (data))
930 return false;
932 find_givs (data);
933 mark_bivs (data);
934 determine_number_of_iterations (data);
936 if (dump_file && (dump_flags & TDF_DETAILS))
938 if (loop_data (loop)->niter.niter)
940 fprintf (dump_file, " number of iterations ");
941 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
942 TDF_SLIM);
943 fprintf (dump_file, "\n");
945 fprintf (dump_file, " may be zero if ");
946 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
947 TDF_SLIM);
948 fprintf (dump_file, "\n");
950 fprintf (dump_file, " bogus unless ");
951 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
952 TDF_SLIM);
953 fprintf (dump_file, "\n");
954 fprintf (dump_file, "\n");
957 fprintf (dump_file, "Induction variables:\n\n");
959 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
961 if (ver_info (data, i)->iv)
962 dump_iv (dump_file, ver_info (data, i)->iv);
966 return true;
969 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
971 static struct iv_use *
972 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
973 tree stmt, enum use_type use_type)
975 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
977 use->id = n_iv_uses (data);
978 use->type = use_type;
979 use->iv = iv;
980 use->stmt = stmt;
981 use->op_p = use_p;
982 use->related_cands = BITMAP_XMALLOC ();
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 dump_use (dump_file, use);
987 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
989 return use;
992 /* Checks whether OP is a loop-level invariant and if so, records it.
993 NONLINEAR_USE is true if the invariant is used in a way we do not
994 handle specially. */
996 static void
997 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
999 basic_block bb;
1000 struct version_info *info;
1002 if (TREE_CODE (op) != SSA_NAME
1003 || !is_gimple_reg (op))
1004 return;
1006 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1007 if (bb
1008 && flow_bb_inside_loop_p (data->current_loop, bb))
1009 return;
1011 info = name_info (data, op);
1012 info->name = op;
1013 info->has_nonlin_use |= nonlinear_use;
1014 if (!info->inv_id)
1015 info->inv_id = ++data->max_inv_id;
1016 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1019 /* Checks whether the use OP is interesting and if so, records it
1020 as TYPE. */
1022 static struct iv_use *
1023 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1024 enum use_type type)
1026 struct iv *iv;
1027 struct iv *civ;
1028 tree stmt;
1029 struct iv_use *use;
1031 if (TREE_CODE (op) != SSA_NAME)
1032 return NULL;
1034 iv = get_iv (data, op);
1035 if (!iv)
1036 return NULL;
1038 if (iv->have_use_for)
1040 use = iv_use (data, iv->use_id);
1042 gcc_assert (use->type == USE_NONLINEAR_EXPR
1043 || use->type == USE_OUTER);
1045 if (type == USE_NONLINEAR_EXPR)
1046 use->type = USE_NONLINEAR_EXPR;
1047 return use;
1050 if (zero_p (iv->step))
1052 record_invariant (data, op, true);
1053 return NULL;
1055 iv->have_use_for = true;
1057 civ = xmalloc (sizeof (struct iv));
1058 *civ = *iv;
1060 stmt = SSA_NAME_DEF_STMT (op);
1061 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1062 || TREE_CODE (stmt) == MODIFY_EXPR);
1064 use = record_use (data, NULL, civ, stmt, type);
1065 iv->use_id = use->id;
1067 return use;
1070 /* Checks whether the use OP is interesting and if so, records it. */
1072 static struct iv_use *
1073 find_interesting_uses_op (struct ivopts_data *data, tree op)
1075 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1078 /* Records a definition of induction variable OP that is used outside of the
1079 loop. */
1081 static struct iv_use *
1082 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1084 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1087 /* Checks whether the condition *COND_P in STMT is interesting
1088 and if so, records it. */
1090 static void
1091 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1093 tree *op0_p;
1094 tree *op1_p;
1095 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1096 struct iv const_iv;
1097 tree zero = integer_zero_node;
1099 const_iv.step = NULL_TREE;
1101 if (integer_zerop (*cond_p)
1102 || integer_nonzerop (*cond_p))
1103 return;
1105 if (TREE_CODE (*cond_p) == SSA_NAME)
1107 op0_p = cond_p;
1108 op1_p = &zero;
1110 else
1112 op0_p = &TREE_OPERAND (*cond_p, 0);
1113 op1_p = &TREE_OPERAND (*cond_p, 1);
1116 if (TREE_CODE (*op0_p) == SSA_NAME)
1117 iv0 = get_iv (data, *op0_p);
1118 else
1119 iv0 = &const_iv;
1121 if (TREE_CODE (*op1_p) == SSA_NAME)
1122 iv1 = get_iv (data, *op1_p);
1123 else
1124 iv1 = &const_iv;
1126 if (/* When comparing with non-invariant value, we may not do any senseful
1127 induction variable elimination. */
1128 (!iv0 || !iv1)
1129 /* Eliminating condition based on two ivs would be nontrivial.
1130 ??? TODO -- it is not really important to handle this case. */
1131 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1133 find_interesting_uses_op (data, *op0_p);
1134 find_interesting_uses_op (data, *op1_p);
1135 return;
1138 if (zero_p (iv0->step) && zero_p (iv1->step))
1140 /* If both are invariants, this is a work for unswitching. */
1141 return;
1144 civ = xmalloc (sizeof (struct iv));
1145 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1146 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1149 /* Cumulates the steps of indices into DATA and replaces their values with the
1150 initial ones. Returns false when the value of the index cannot be determined.
1151 Callback for for_each_index. */
1153 struct ifs_ivopts_data
1155 struct ivopts_data *ivopts_data;
1156 tree stmt;
1157 tree *step_p;
1160 static bool
1161 idx_find_step (tree base, tree *idx, void *data)
1163 struct ifs_ivopts_data *dta = data;
1164 struct iv *iv;
1165 tree step, type, iv_type, iv_step, lbound;
1166 basic_block def_bb;
1167 struct loop *loop = dta->ivopts_data->current_loop;
1169 if (TREE_CODE (*idx) != SSA_NAME)
1170 return true;
1172 iv = get_iv (dta->ivopts_data, *idx);
1173 if (!iv)
1174 return false;
1176 *idx = iv->base;
1178 if (!iv->step)
1179 return true;
1181 iv_type = TREE_TYPE (iv->base);
1182 type = build_pointer_type (TREE_TYPE (base));
1183 if (TREE_CODE (base) == ARRAY_REF)
1185 step = array_ref_element_size (base);
1186 lbound = array_ref_low_bound (base);
1188 /* We only handle addresses whose step is an integer constant. */
1189 if (TREE_CODE (step) != INTEGER_CST)
1190 return false;
1192 /* We need the lower bound to be invariant in loop, since otherwise
1193 we are unable to initialize a new induction variable created
1194 in strength reduction -- we need to take the address of the
1195 reference in front of the loop. */
1196 if (is_gimple_min_invariant (lbound))
1197 ; /* Nothing to do. */
1198 else if (TREE_CODE (lbound) != SSA_NAME)
1199 return false;
1200 else
1202 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (lbound));
1203 if (def_bb
1204 && flow_bb_inside_loop_p (loop, def_bb))
1205 return false;
1208 else
1209 /* The step for pointer arithmetics already is 1 byte. */
1210 step = build_int_cst (type, 1);
1212 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1213 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1214 type, iv->base, iv->step, dta->stmt);
1215 else
1216 iv_step = fold_convert (iv_type, iv->step);
1218 if (!iv_step)
1220 /* The index might wrap. */
1221 return false;
1224 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1226 if (!*dta->step_p)
1227 *dta->step_p = step;
1228 else
1229 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1231 return true;
1234 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1235 object is passed to it in DATA. */
1237 static bool
1238 idx_record_use (tree base, tree *idx,
1239 void *data)
1241 find_interesting_uses_op (data, *idx);
1242 if (TREE_CODE (base) == ARRAY_REF)
1244 find_interesting_uses_op (data, array_ref_element_size (base));
1245 find_interesting_uses_op (data, array_ref_low_bound (base));
1247 return true;
1250 /* Finds addresses in *OP_P inside STMT. */
1252 static void
1253 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1255 tree base = unshare_expr (*op_p), step = NULL;
1256 struct iv *civ;
1257 struct ifs_ivopts_data ifs_ivopts_data;
1259 /* Ignore bitfields for now. Not really something terribly complicated
1260 to handle. TODO. */
1261 if (TREE_CODE (base) == COMPONENT_REF
1262 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1263 goto fail;
1265 ifs_ivopts_data.ivopts_data = data;
1266 ifs_ivopts_data.stmt = stmt;
1267 ifs_ivopts_data.step_p = &step;
1268 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1269 || zero_p (step))
1270 goto fail;
1272 if (TREE_CODE (base) == INDIRECT_REF)
1273 base = TREE_OPERAND (base, 0);
1274 else
1275 base = build_addr (base);
1277 civ = alloc_iv (base, step);
1278 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1279 return;
1281 fail:
1282 for_each_index (op_p, idx_record_use, data);
1285 /* Finds and records invariants used in STMT. */
1287 static void
1288 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1290 use_optype uses = NULL;
1291 unsigned i, n;
1292 tree op;
1294 if (TREE_CODE (stmt) == PHI_NODE)
1295 n = PHI_NUM_ARGS (stmt);
1296 else
1298 get_stmt_operands (stmt);
1299 uses = STMT_USE_OPS (stmt);
1300 n = NUM_USES (uses);
1303 for (i = 0; i < n; i++)
1305 if (TREE_CODE (stmt) == PHI_NODE)
1306 op = PHI_ARG_DEF (stmt, i);
1307 else
1308 op = USE_OP (uses, i);
1310 record_invariant (data, op, false);
1314 /* Finds interesting uses of induction variables in the statement STMT. */
1316 static void
1317 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1319 struct iv *iv;
1320 tree op, lhs, rhs;
1321 use_optype uses = NULL;
1322 unsigned i, n;
1324 find_invariants_stmt (data, stmt);
1326 if (TREE_CODE (stmt) == COND_EXPR)
1328 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1329 return;
1332 if (TREE_CODE (stmt) == MODIFY_EXPR)
1334 lhs = TREE_OPERAND (stmt, 0);
1335 rhs = TREE_OPERAND (stmt, 1);
1337 if (TREE_CODE (lhs) == SSA_NAME)
1339 /* If the statement defines an induction variable, the uses are not
1340 interesting by themselves. */
1342 iv = get_iv (data, lhs);
1344 if (iv && !zero_p (iv->step))
1345 return;
1348 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1350 case tcc_comparison:
1351 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1352 return;
1354 case tcc_reference:
1355 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1356 if (REFERENCE_CLASS_P (lhs))
1357 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1358 return;
1360 default: ;
1363 if (REFERENCE_CLASS_P (lhs)
1364 && is_gimple_val (rhs))
1366 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1367 find_interesting_uses_op (data, rhs);
1368 return;
1371 /* TODO -- we should also handle address uses of type
1373 memory = call (whatever);
1377 call (memory). */
1380 if (TREE_CODE (stmt) == PHI_NODE
1381 && bb_for_stmt (stmt) == data->current_loop->header)
1383 lhs = PHI_RESULT (stmt);
1384 iv = get_iv (data, lhs);
1386 if (iv && !zero_p (iv->step))
1387 return;
1390 if (TREE_CODE (stmt) == PHI_NODE)
1391 n = PHI_NUM_ARGS (stmt);
1392 else
1394 uses = STMT_USE_OPS (stmt);
1395 n = NUM_USES (uses);
1398 for (i = 0; i < n; i++)
1400 if (TREE_CODE (stmt) == PHI_NODE)
1401 op = PHI_ARG_DEF (stmt, i);
1402 else
1403 op = USE_OP (uses, i);
1405 if (TREE_CODE (op) != SSA_NAME)
1406 continue;
1408 iv = get_iv (data, op);
1409 if (!iv)
1410 continue;
1412 find_interesting_uses_op (data, op);
1416 /* Finds interesting uses of induction variables outside of loops
1417 on loop exit edge EXIT. */
1419 static void
1420 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1422 tree phi, def;
1424 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1426 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1427 find_interesting_uses_outer (data, def);
1431 /* Finds uses of the induction variables that are interesting. */
1433 static void
1434 find_interesting_uses (struct ivopts_data *data)
1436 basic_block bb;
1437 block_stmt_iterator bsi;
1438 tree phi;
1439 basic_block *body = get_loop_body (data->current_loop);
1440 unsigned i;
1441 struct version_info *info;
1442 edge e;
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1445 fprintf (dump_file, "Uses:\n\n");
1447 for (i = 0; i < data->current_loop->num_nodes; i++)
1449 edge_iterator ei;
1450 bb = body[i];
1452 FOR_EACH_EDGE (e, ei, bb->succs)
1454 if (e->dest != EXIT_BLOCK_PTR
1455 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1456 find_interesting_uses_outside (data, e);
1459 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1460 find_interesting_uses_stmt (data, phi);
1461 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1462 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1465 if (dump_file && (dump_flags & TDF_DETAILS))
1467 fprintf (dump_file, "\n");
1469 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1471 info = ver_info (data, i);
1472 if (info->inv_id)
1474 fprintf (dump_file, " ");
1475 print_generic_expr (dump_file, info->name, TDF_SLIM);
1476 fprintf (dump_file, " is invariant (%d)%s\n",
1477 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1481 fprintf (dump_file, "\n");
1484 free (body);
1487 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1488 position to POS. If USE is not NULL, the candidate is set as related to
1489 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1490 replacement of the final value of the iv by a direct computation. */
1492 static struct iv_cand *
1493 add_candidate_1 (struct ivopts_data *data,
1494 tree base, tree step, bool important, enum iv_position pos,
1495 struct iv_use *use, tree incremented_at)
1497 unsigned i;
1498 struct iv_cand *cand = NULL;
1499 tree type;
1501 if (base)
1503 type = TREE_TYPE (base);
1504 if (!TYPE_UNSIGNED (type))
1506 type = unsigned_type_for (type);
1507 base = fold_convert (type, base);
1508 if (step)
1509 step = fold_convert (type, step);
1513 for (i = 0; i < n_iv_cands (data); i++)
1515 cand = iv_cand (data, i);
1517 if (cand->pos != pos)
1518 continue;
1520 if (cand->incremented_at != incremented_at)
1521 continue;
1523 if (!cand->iv)
1525 if (!base && !step)
1526 break;
1528 continue;
1531 if (!base && !step)
1532 continue;
1534 if (!operand_equal_p (base, cand->iv->base, 0))
1535 continue;
1537 if (zero_p (cand->iv->step))
1539 if (zero_p (step))
1540 break;
1542 else
1544 if (step && operand_equal_p (step, cand->iv->step, 0))
1545 break;
1549 if (i == n_iv_cands (data))
1551 cand = xcalloc (1, sizeof (struct iv_cand));
1552 cand->id = i;
1554 if (!base && !step)
1555 cand->iv = NULL;
1556 else
1557 cand->iv = alloc_iv (base, step);
1559 cand->pos = pos;
1560 if (pos != IP_ORIGINAL && cand->iv)
1562 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1563 cand->var_after = cand->var_before;
1565 cand->important = important;
1566 cand->incremented_at = incremented_at;
1567 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1569 if (dump_file && (dump_flags & TDF_DETAILS))
1570 dump_cand (dump_file, cand);
1573 if (important && !cand->important)
1575 cand->important = true;
1576 if (dump_file && (dump_flags & TDF_DETAILS))
1577 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1580 if (use)
1582 bitmap_set_bit (use->related_cands, i);
1583 if (dump_file && (dump_flags & TDF_DETAILS))
1584 fprintf (dump_file, "Candidate %d is related to use %d\n",
1585 cand->id, use->id);
1588 return cand;
1591 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1592 position to POS. If USE is not NULL, the candidate is set as related to
1593 it. The candidate computation is scheduled on all available positions. */
1595 static void
1596 add_candidate (struct ivopts_data *data,
1597 tree base, tree step, bool important, struct iv_use *use)
1599 if (ip_normal_pos (data->current_loop))
1600 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1601 if (ip_end_pos (data->current_loop))
1602 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1605 /* Adds standard iv candidates. */
1607 static void
1608 add_standard_iv_candidates (struct ivopts_data *data)
1610 /* Add 0 + 1 * iteration candidate. */
1611 add_candidate (data,
1612 build_int_cst (unsigned_intSI_type_node, 0),
1613 build_int_cst (unsigned_intSI_type_node, 1),
1614 true, NULL);
1616 /* The same for a long type if it is still fast enough. */
1617 if (BITS_PER_WORD > 32)
1618 add_candidate (data,
1619 build_int_cst (unsigned_intDI_type_node, 0),
1620 build_int_cst (unsigned_intDI_type_node, 1),
1621 true, NULL);
1625 /* Adds candidates bases on the old induction variable IV. */
1627 static void
1628 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1630 tree phi, def;
1631 struct iv_cand *cand;
1633 add_candidate (data, iv->base, iv->step, true, NULL);
1635 /* The same, but with initial value zero. */
1636 add_candidate (data,
1637 build_int_cst (TREE_TYPE (iv->base), 0),
1638 iv->step, true, NULL);
1640 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1641 if (TREE_CODE (phi) == PHI_NODE)
1643 /* Additionally record the possibility of leaving the original iv
1644 untouched. */
1645 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1646 cand = add_candidate_1 (data,
1647 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1648 SSA_NAME_DEF_STMT (def));
1649 cand->var_before = iv->ssa_name;
1650 cand->var_after = def;
1654 /* Adds candidates based on the old induction variables. */
1656 static void
1657 add_old_ivs_candidates (struct ivopts_data *data)
1659 unsigned i;
1660 struct iv *iv;
1662 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1664 iv = ver_info (data, i)->iv;
1665 if (iv && iv->biv_p && !zero_p (iv->step))
1666 add_old_iv_candidates (data, iv);
1670 /* Adds candidates based on the value of the induction variable IV and USE. */
1672 static void
1673 add_iv_value_candidates (struct ivopts_data *data,
1674 struct iv *iv, struct iv_use *use)
1676 add_candidate (data, iv->base, iv->step, false, use);
1678 /* The same, but with initial value zero. */
1679 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1680 iv->step, false, use);
1683 /* Adds candidates based on the address IV and USE. */
1685 static void
1686 add_address_candidates (struct ivopts_data *data,
1687 struct iv *iv, struct iv_use *use)
1689 tree base, abase, tmp, *act;
1691 /* First, the trivial choices. */
1692 add_iv_value_candidates (data, iv, use);
1694 /* Second, try removing the COMPONENT_REFs. */
1695 if (TREE_CODE (iv->base) == ADDR_EXPR)
1697 base = TREE_OPERAND (iv->base, 0);
1698 while (TREE_CODE (base) == COMPONENT_REF
1699 || (TREE_CODE (base) == ARRAY_REF
1700 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1701 base = TREE_OPERAND (base, 0);
1703 if (base != TREE_OPERAND (iv->base, 0))
1705 if (TREE_CODE (base) == INDIRECT_REF)
1706 base = TREE_OPERAND (base, 0);
1707 else
1708 base = build_addr (base);
1709 add_candidate (data, base, iv->step, false, use);
1713 /* Third, try removing the constant offset. */
1714 abase = iv->base;
1715 while (TREE_CODE (abase) == PLUS_EXPR
1716 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1717 abase = TREE_OPERAND (abase, 0);
1718 /* We found the offset, so make the copy of the non-shared part and
1719 remove it. */
1720 if (TREE_CODE (abase) == PLUS_EXPR)
1722 tmp = iv->base;
1723 act = &base;
1725 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1727 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1728 NULL_TREE, TREE_OPERAND (tmp, 1));
1729 act = &TREE_OPERAND (*act, 0);
1731 *act = TREE_OPERAND (tmp, 0);
1733 add_candidate (data, base, iv->step, false, use);
1737 /* Possibly adds pseudocandidate for replacing the final value of USE by
1738 a direct computation. */
1740 static void
1741 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1743 struct tree_niter_desc *niter;
1744 struct loop *loop = data->current_loop;
1746 /* We must know where we exit the loop and how many times does it roll. */
1747 if (!single_dom_exit (loop))
1748 return;
1750 niter = &loop_data (loop)->niter;
1751 if (!niter->niter
1752 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1753 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1754 return;
1756 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1759 /* Adds candidates based on the uses. */
1761 static void
1762 add_derived_ivs_candidates (struct ivopts_data *data)
1764 unsigned i;
1766 for (i = 0; i < n_iv_uses (data); i++)
1768 struct iv_use *use = iv_use (data, i);
1770 if (!use)
1771 continue;
1773 switch (use->type)
1775 case USE_NONLINEAR_EXPR:
1776 case USE_COMPARE:
1777 /* Just add the ivs based on the value of the iv used here. */
1778 add_iv_value_candidates (data, use->iv, use);
1779 break;
1781 case USE_OUTER:
1782 add_iv_value_candidates (data, use->iv, use);
1784 /* Additionally, add the pseudocandidate for the possibility to
1785 replace the final value by a direct computation. */
1786 add_iv_outer_candidates (data, use);
1787 break;
1789 case USE_ADDRESS:
1790 add_address_candidates (data, use->iv, use);
1791 break;
1793 default:
1794 gcc_unreachable ();
1799 /* Finds the candidates for the induction variables. */
1801 static void
1802 find_iv_candidates (struct ivopts_data *data)
1804 /* Add commonly used ivs. */
1805 add_standard_iv_candidates (data);
1807 /* Add old induction variables. */
1808 add_old_ivs_candidates (data);
1810 /* Add induction variables derived from uses. */
1811 add_derived_ivs_candidates (data);
1814 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1815 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1816 we allocate a simple list to every use. */
1818 static void
1819 alloc_use_cost_map (struct ivopts_data *data)
1821 unsigned i, n_imp = 0, size, j;
1823 if (!data->consider_all_candidates)
1825 for (i = 0; i < n_iv_cands (data); i++)
1827 struct iv_cand *cand = iv_cand (data, i);
1828 if (cand->important)
1829 n_imp++;
1833 for (i = 0; i < n_iv_uses (data); i++)
1835 struct iv_use *use = iv_use (data, i);
1837 if (data->consider_all_candidates)
1839 size = n_iv_cands (data);
1840 use->n_map_members = size;
1842 else
1844 size = n_imp;
1845 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1846 use->n_map_members = 0;
1849 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1853 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1854 on invariants DEPENDS_ON. */
1856 static void
1857 set_use_iv_cost (struct ivopts_data *data,
1858 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1859 bitmap depends_on)
1861 if (cost == INFTY
1862 && depends_on)
1864 BITMAP_XFREE (depends_on);
1865 depends_on = NULL;
1868 if (data->consider_all_candidates)
1870 use->cost_map[cand->id].cand = cand;
1871 use->cost_map[cand->id].cost = cost;
1872 use->cost_map[cand->id].depends_on = depends_on;
1873 return;
1876 if (cost == INFTY)
1877 return;
1879 use->cost_map[use->n_map_members].cand = cand;
1880 use->cost_map[use->n_map_members].cost = cost;
1881 use->cost_map[use->n_map_members].depends_on = depends_on;
1882 use->n_map_members++;
1885 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1886 DEPENDS_ON. */
1888 static unsigned
1889 get_use_iv_cost (struct ivopts_data *data,
1890 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1892 unsigned i;
1894 if (!cand)
1895 return INFTY;
1897 if (data->consider_all_candidates)
1898 i = cand->id;
1899 else
1901 for (i = 0; i < use->n_map_members; i++)
1902 if (use->cost_map[i].cand == cand)
1903 break;
1905 if (i == use->n_map_members)
1906 return INFTY;
1909 if (depends_on)
1910 *depends_on = use->cost_map[i].depends_on;
1911 return use->cost_map[i].cost;
1914 /* Returns estimate on cost of computing SEQ. */
1916 static unsigned
1917 seq_cost (rtx seq)
1919 unsigned cost = 0;
1920 rtx set;
1922 for (; seq; seq = NEXT_INSN (seq))
1924 set = single_set (seq);
1925 if (set)
1926 cost += rtx_cost (set, SET);
1927 else
1928 cost++;
1931 return cost;
1934 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1935 static rtx
1936 produce_memory_decl_rtl (tree obj, int *regno)
1938 rtx x;
1939 if (!obj)
1940 abort ();
1941 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1943 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1944 x = gen_rtx_SYMBOL_REF (Pmode, name);
1946 else
1947 x = gen_raw_REG (Pmode, (*regno)++);
1949 return gen_rtx_MEM (DECL_MODE (obj), x);
1952 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1953 walk_tree. DATA contains the actual fake register number. */
1955 static tree
1956 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1958 tree obj = NULL_TREE;
1959 rtx x = NULL_RTX;
1960 int *regno = data;
1962 switch (TREE_CODE (*expr_p))
1964 case ADDR_EXPR:
1965 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1966 (handled_component_p (*expr_p)
1967 || TREE_CODE (*expr_p) == REALPART_EXPR
1968 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1969 expr_p = &TREE_OPERAND (*expr_p, 0));
1970 obj = *expr_p;
1971 if (DECL_P (obj))
1972 x = produce_memory_decl_rtl (obj, regno);
1973 break;
1975 case SSA_NAME:
1976 *ws = 0;
1977 obj = SSA_NAME_VAR (*expr_p);
1978 if (!DECL_RTL_SET_P (obj))
1979 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1980 break;
1982 case VAR_DECL:
1983 case PARM_DECL:
1984 case RESULT_DECL:
1985 *ws = 0;
1986 obj = *expr_p;
1988 if (DECL_RTL_SET_P (obj))
1989 break;
1991 if (DECL_MODE (obj) == BLKmode)
1992 x = produce_memory_decl_rtl (obj, regno);
1993 else
1994 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1996 break;
1998 default:
1999 break;
2002 if (x)
2004 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2005 SET_DECL_RTL (obj, x);
2008 return NULL_TREE;
2011 /* Determines cost of the computation of EXPR. */
2013 static unsigned
2014 computation_cost (tree expr)
2016 rtx seq, rslt;
2017 tree type = TREE_TYPE (expr);
2018 unsigned cost;
2019 int regno = 0;
2021 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2022 start_sequence ();
2023 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2024 seq = get_insns ();
2025 end_sequence ();
2027 cost = seq_cost (seq);
2028 if (GET_CODE (rslt) == MEM)
2029 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2031 return cost;
2034 /* Returns variable containing the value of candidate CAND at statement AT. */
2036 static tree
2037 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2039 if (stmt_after_increment (loop, cand, stmt))
2040 return cand->var_after;
2041 else
2042 return cand->var_before;
2045 /* Determines the expression by that USE is expressed from induction variable
2046 CAND at statement AT in LOOP. */
2048 static tree
2049 get_computation_at (struct loop *loop,
2050 struct iv_use *use, struct iv_cand *cand, tree at)
2052 tree ubase = use->iv->base;
2053 tree ustep = use->iv->step;
2054 tree cbase = cand->iv->base;
2055 tree cstep = cand->iv->step;
2056 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2057 tree uutype;
2058 tree expr, delta;
2059 tree ratio;
2060 unsigned HOST_WIDE_INT ustepi, cstepi;
2061 HOST_WIDE_INT ratioi;
2063 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2065 /* We do not have a precision to express the values of use. */
2066 return NULL_TREE;
2069 expr = var_at_stmt (loop, cand, at);
2071 if (TREE_TYPE (expr) != ctype)
2073 /* This may happen with the original ivs. */
2074 expr = fold_convert (ctype, expr);
2077 if (TYPE_UNSIGNED (utype))
2078 uutype = utype;
2079 else
2081 uutype = unsigned_type_for (utype);
2082 ubase = fold_convert (uutype, ubase);
2083 ustep = fold_convert (uutype, ustep);
2086 if (uutype != ctype)
2088 expr = fold_convert (uutype, expr);
2089 cbase = fold_convert (uutype, cbase);
2090 cstep = fold_convert (uutype, cstep);
2093 if (!cst_and_fits_in_hwi (cstep)
2094 || !cst_and_fits_in_hwi (ustep))
2095 return NULL_TREE;
2097 ustepi = int_cst_value (ustep);
2098 cstepi = int_cst_value (cstep);
2100 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2102 /* TODO maybe consider case when ustep divides cstep and the ratio is
2103 a power of 2 (so that the division is fast to execute)? We would
2104 need to be much more careful with overflows etc. then. */
2105 return NULL_TREE;
2108 /* We may need to shift the value if we are after the increment. */
2109 if (stmt_after_increment (loop, cand, at))
2110 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2112 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2113 or |ratio| == 1, it is better to handle this like
2115 ubase - ratio * cbase + ratio * var. */
2117 if (ratioi == 1)
2119 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2120 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2122 else if (ratioi == -1)
2124 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2125 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2127 else if (TREE_CODE (cbase) == INTEGER_CST)
2129 ratio = build_int_cst_type (uutype, ratioi);
2130 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2131 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2132 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2133 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2135 else
2137 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2138 ratio = build_int_cst_type (uutype, ratioi);
2139 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2140 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2143 return fold_convert (utype, expr);
2146 /* Determines the expression by that USE is expressed from induction variable
2147 CAND in LOOP. */
2149 static tree
2150 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2152 return get_computation_at (loop, use, cand, use->stmt);
2155 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2157 static void
2158 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2160 tree op0, op1;
2161 enum tree_code code;
2163 while (1)
2165 if (cst_and_fits_in_hwi (*expr))
2167 *offset += int_cst_value (*expr);
2168 *expr = integer_zero_node;
2169 return;
2172 code = TREE_CODE (*expr);
2174 if (code != PLUS_EXPR && code != MINUS_EXPR)
2175 return;
2177 op0 = TREE_OPERAND (*expr, 0);
2178 op1 = TREE_OPERAND (*expr, 1);
2180 if (cst_and_fits_in_hwi (op1))
2182 if (code == PLUS_EXPR)
2183 *offset += int_cst_value (op1);
2184 else
2185 *offset -= int_cst_value (op1);
2187 *expr = op0;
2188 continue;
2191 if (code != PLUS_EXPR)
2192 return;
2194 if (!cst_and_fits_in_hwi (op0))
2195 return;
2197 *offset += int_cst_value (op0);
2198 *expr = op1;
2202 /* Returns cost of addition in MODE. */
2204 static unsigned
2205 add_cost (enum machine_mode mode)
2207 static unsigned costs[NUM_MACHINE_MODES];
2208 rtx seq;
2209 unsigned cost;
2211 if (costs[mode])
2212 return costs[mode];
2214 start_sequence ();
2215 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2216 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2217 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2218 NULL_RTX);
2219 seq = get_insns ();
2220 end_sequence ();
2222 cost = seq_cost (seq);
2223 if (!cost)
2224 cost = 1;
2226 costs[mode] = cost;
2228 if (dump_file && (dump_flags & TDF_DETAILS))
2229 fprintf (dump_file, "Addition in %s costs %d\n",
2230 GET_MODE_NAME (mode), cost);
2231 return cost;
2234 /* Entry in a hashtable of already known costs for multiplication. */
2235 struct mbc_entry
2237 HOST_WIDE_INT cst; /* The constant to multiply by. */
2238 enum machine_mode mode; /* In mode. */
2239 unsigned cost; /* The cost. */
2242 /* Counts hash value for the ENTRY. */
2244 static hashval_t
2245 mbc_entry_hash (const void *entry)
2247 const struct mbc_entry *e = entry;
2249 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2252 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2254 static int
2255 mbc_entry_eq (const void *entry1, const void *entry2)
2257 const struct mbc_entry *e1 = entry1;
2258 const struct mbc_entry *e2 = entry2;
2260 return (e1->mode == e2->mode
2261 && e1->cst == e2->cst);
2264 /* Returns cost of multiplication by constant CST in MODE. */
2266 static unsigned
2267 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2269 static htab_t costs;
2270 struct mbc_entry **cached, act;
2271 rtx seq;
2272 unsigned cost;
2274 if (!costs)
2275 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2277 act.mode = mode;
2278 act.cst = cst;
2279 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2280 if (*cached)
2281 return (*cached)->cost;
2283 *cached = xmalloc (sizeof (struct mbc_entry));
2284 (*cached)->mode = mode;
2285 (*cached)->cst = cst;
2287 start_sequence ();
2288 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2289 NULL_RTX, 0);
2290 seq = get_insns ();
2291 end_sequence ();
2293 cost = seq_cost (seq);
2295 if (dump_file && (dump_flags & TDF_DETAILS))
2296 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2297 (int) cst, GET_MODE_NAME (mode), cost);
2299 (*cached)->cost = cost;
2301 return cost;
2304 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2305 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2306 variable is omitted. The created memory accesses MODE.
2308 TODO -- there must be some better way. This all is quite crude. */
2310 static unsigned
2311 get_address_cost (bool symbol_present, bool var_present,
2312 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2314 #define MAX_RATIO 128
2315 static sbitmap valid_mult;
2316 static HOST_WIDE_INT rat, off;
2317 static HOST_WIDE_INT min_offset, max_offset;
2318 static unsigned costs[2][2][2][2];
2319 unsigned cost, acost;
2320 rtx seq, addr, base;
2321 bool offset_p, ratio_p;
2322 rtx reg1;
2323 HOST_WIDE_INT s_offset;
2324 unsigned HOST_WIDE_INT mask;
2325 unsigned bits;
2327 if (!valid_mult)
2329 HOST_WIDE_INT i;
2331 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2333 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2334 for (i = 1; i <= 1 << 20; i <<= 1)
2336 XEXP (addr, 1) = GEN_INT (i);
2337 if (!memory_address_p (Pmode, addr))
2338 break;
2340 max_offset = i >> 1;
2341 off = max_offset;
2343 for (i = 1; i <= 1 << 20; i <<= 1)
2345 XEXP (addr, 1) = GEN_INT (-i);
2346 if (!memory_address_p (Pmode, addr))
2347 break;
2349 min_offset = -(i >> 1);
2351 if (dump_file && (dump_flags & TDF_DETAILS))
2353 fprintf (dump_file, "get_address_cost:\n");
2354 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2355 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2358 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2359 sbitmap_zero (valid_mult);
2360 rat = 1;
2361 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2362 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2364 XEXP (addr, 1) = GEN_INT (i);
2365 if (memory_address_p (Pmode, addr))
2367 SET_BIT (valid_mult, i + MAX_RATIO);
2368 rat = i;
2372 if (dump_file && (dump_flags & TDF_DETAILS))
2374 fprintf (dump_file, " allowed multipliers:");
2375 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2376 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2377 fprintf (dump_file, " %d", (int) i);
2378 fprintf (dump_file, "\n");
2379 fprintf (dump_file, "\n");
2383 bits = GET_MODE_BITSIZE (Pmode);
2384 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2385 offset &= mask;
2386 if ((offset >> (bits - 1) & 1))
2387 offset |= ~mask;
2388 s_offset = offset;
2390 cost = 0;
2391 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2392 ratio_p = (ratio != 1
2393 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2394 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2396 if (ratio != 1 && !ratio_p)
2397 cost += multiply_by_cost (ratio, Pmode);
2399 if (s_offset && !offset_p && !symbol_present)
2401 cost += add_cost (Pmode);
2402 var_present = true;
2405 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2406 if (!acost)
2408 acost = 0;
2410 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2411 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2412 if (ratio_p)
2413 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2415 if (symbol_present)
2417 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2418 if (offset_p)
2419 base = gen_rtx_fmt_e (CONST, Pmode,
2420 gen_rtx_fmt_ee (PLUS, Pmode,
2421 base,
2422 GEN_INT (off)));
2423 if (var_present)
2424 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2427 else if (var_present)
2429 base = reg1;
2430 if (offset_p)
2431 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2433 else if (offset_p)
2434 base = GEN_INT (off);
2435 else
2436 base = NULL_RTX;
2438 if (base)
2439 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2441 start_sequence ();
2442 addr = memory_address (Pmode, addr);
2443 seq = get_insns ();
2444 end_sequence ();
2446 acost = seq_cost (seq);
2447 acost += address_cost (addr, Pmode);
2449 if (!acost)
2450 acost = 1;
2451 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2454 return cost + acost;
2457 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2458 the bitmap to that we should store it. */
2460 static struct ivopts_data *fd_ivopts_data;
2461 static tree
2462 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2464 bitmap *depends_on = data;
2465 struct version_info *info;
2467 if (TREE_CODE (*expr_p) != SSA_NAME)
2468 return NULL_TREE;
2469 info = name_info (fd_ivopts_data, *expr_p);
2471 if (!info->inv_id || info->has_nonlin_use)
2472 return NULL_TREE;
2474 if (!*depends_on)
2475 *depends_on = BITMAP_XMALLOC ();
2476 bitmap_set_bit (*depends_on, info->inv_id);
2478 return NULL_TREE;
2481 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2482 invariants the computation depends on. */
2484 static unsigned
2485 force_var_cost (struct ivopts_data *data,
2486 tree expr, bitmap *depends_on)
2488 static bool costs_initialized = false;
2489 static unsigned integer_cost;
2490 static unsigned symbol_cost;
2491 static unsigned address_cost;
2493 if (!costs_initialized)
2495 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2496 rtx x = gen_rtx_MEM (DECL_MODE (var),
2497 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2498 tree addr;
2499 tree type = build_pointer_type (integer_type_node);
2501 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2502 2000));
2504 SET_DECL_RTL (var, x);
2505 TREE_STATIC (var) = 1;
2506 addr = build1 (ADDR_EXPR, type, var);
2507 symbol_cost = computation_cost (addr) + 1;
2509 address_cost
2510 = computation_cost (build2 (PLUS_EXPR, type,
2511 addr,
2512 build_int_cst_type (type, 2000))) + 1;
2513 if (dump_file && (dump_flags & TDF_DETAILS))
2515 fprintf (dump_file, "force_var_cost:\n");
2516 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2517 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2518 fprintf (dump_file, " address %d\n", (int) address_cost);
2519 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2520 fprintf (dump_file, "\n");
2523 costs_initialized = true;
2526 if (depends_on)
2528 fd_ivopts_data = data;
2529 walk_tree (&expr, find_depends, depends_on, NULL);
2532 if (SSA_VAR_P (expr))
2533 return 0;
2535 if (TREE_INVARIANT (expr))
2537 if (TREE_CODE (expr) == INTEGER_CST)
2538 return integer_cost;
2540 if (TREE_CODE (expr) == ADDR_EXPR)
2542 tree obj = TREE_OPERAND (expr, 0);
2544 if (TREE_CODE (obj) == VAR_DECL
2545 || TREE_CODE (obj) == PARM_DECL
2546 || TREE_CODE (obj) == RESULT_DECL)
2547 return symbol_cost;
2550 return address_cost;
2553 /* Just an arbitrary value, FIXME. */
2554 return target_spill_cost;
2557 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2558 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2559 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2560 invariants the computation depends on. */
2562 static unsigned
2563 split_address_cost (struct ivopts_data *data,
2564 tree addr, bool *symbol_present, bool *var_present,
2565 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2567 tree core;
2568 HOST_WIDE_INT bitsize;
2569 HOST_WIDE_INT bitpos;
2570 tree toffset;
2571 enum machine_mode mode;
2572 int unsignedp, volatilep;
2574 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2575 &unsignedp, &volatilep);
2577 if (toffset != 0
2578 || bitpos % BITS_PER_UNIT != 0
2579 || TREE_CODE (core) != VAR_DECL)
2581 *symbol_present = false;
2582 *var_present = true;
2583 fd_ivopts_data = data;
2584 walk_tree (&addr, find_depends, depends_on, NULL);
2585 return target_spill_cost;
2588 *offset += bitpos / BITS_PER_UNIT;
2589 if (TREE_STATIC (core)
2590 || DECL_EXTERNAL (core))
2592 *symbol_present = true;
2593 *var_present = false;
2594 return 0;
2597 *symbol_present = false;
2598 *var_present = true;
2599 return 0;
2602 /* Estimates cost of expressing difference of addresses E1 - E2 as
2603 var + symbol + offset. The value of offset is added to OFFSET,
2604 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2605 part is missing. DEPENDS_ON is a set of the invariants the computation
2606 depends on. */
2608 static unsigned
2609 ptr_difference_cost (struct ivopts_data *data,
2610 tree e1, tree e2, bool *symbol_present, bool *var_present,
2611 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2613 HOST_WIDE_INT diff = 0;
2614 unsigned cost;
2616 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2618 if (TREE_CODE (e2) == ADDR_EXPR
2619 && ptr_difference_const (TREE_OPERAND (e1, 0),
2620 TREE_OPERAND (e2, 0), &diff))
2622 *offset += diff;
2623 *symbol_present = false;
2624 *var_present = false;
2625 return 0;
2628 if (e2 == integer_zero_node)
2629 return split_address_cost (data, TREE_OPERAND (e1, 0),
2630 symbol_present, var_present, offset, depends_on);
2632 *symbol_present = false;
2633 *var_present = true;
2635 cost = force_var_cost (data, e1, depends_on);
2636 cost += force_var_cost (data, e2, depends_on);
2637 cost += add_cost (Pmode);
2639 return cost;
2642 /* Estimates cost of expressing difference E1 - E2 as
2643 var + symbol + offset. The value of offset is added to OFFSET,
2644 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2645 part is missing. DEPENDS_ON is a set of the invariants the computation
2646 depends on. */
2648 static unsigned
2649 difference_cost (struct ivopts_data *data,
2650 tree e1, tree e2, bool *symbol_present, bool *var_present,
2651 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2653 unsigned cost;
2654 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2656 strip_offset (&e1, offset);
2657 *offset = -*offset;
2658 strip_offset (&e2, offset);
2659 *offset = -*offset;
2661 if (TREE_CODE (e1) == ADDR_EXPR)
2662 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2663 depends_on);
2664 *symbol_present = false;
2666 if (operand_equal_p (e1, e2, 0))
2668 *var_present = false;
2669 return 0;
2671 *var_present = true;
2672 if (zero_p (e2))
2673 return force_var_cost (data, e1, depends_on);
2675 if (zero_p (e1))
2677 cost = force_var_cost (data, e2, depends_on);
2678 cost += multiply_by_cost (-1, mode);
2680 return cost;
2683 cost = force_var_cost (data, e1, depends_on);
2684 cost += force_var_cost (data, e2, depends_on);
2685 cost += add_cost (mode);
2687 return cost;
2690 /* Determines the cost of the computation by that USE is expressed
2691 from induction variable CAND. If ADDRESS_P is true, we just need
2692 to create an address from it, otherwise we want to get it into
2693 register. A set of invariants we depend on is stored in
2694 DEPENDS_ON. AT is the statement at that the value is computed. */
2696 static unsigned
2697 get_computation_cost_at (struct ivopts_data *data,
2698 struct iv_use *use, struct iv_cand *cand,
2699 bool address_p, bitmap *depends_on, tree at)
2701 tree ubase = use->iv->base, ustep = use->iv->step;
2702 tree cbase, cstep;
2703 tree utype = TREE_TYPE (ubase), ctype;
2704 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2705 HOST_WIDE_INT ratio, aratio;
2706 bool var_present, symbol_present;
2707 unsigned cost = 0, n_sums;
2709 *depends_on = NULL;
2711 /* Only consider real candidates. */
2712 if (!cand->iv)
2713 return INFTY;
2715 cbase = cand->iv->base;
2716 cstep = cand->iv->step;
2717 ctype = TREE_TYPE (cbase);
2719 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2721 /* We do not have a precision to express the values of use. */
2722 return INFTY;
2725 if (!cst_and_fits_in_hwi (ustep)
2726 || !cst_and_fits_in_hwi (cstep))
2727 return INFTY;
2729 if (TREE_CODE (ubase) == INTEGER_CST
2730 && !cst_and_fits_in_hwi (ubase))
2731 goto fallback;
2733 if (TREE_CODE (cbase) == INTEGER_CST
2734 && !cst_and_fits_in_hwi (cbase))
2735 goto fallback;
2737 ustepi = int_cst_value (ustep);
2738 cstepi = int_cst_value (cstep);
2740 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2742 /* TODO -- add direct handling of this case. */
2743 goto fallback;
2746 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2747 return INFTY;
2749 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2750 or ratio == 1, it is better to handle this like
2752 ubase - ratio * cbase + ratio * var
2754 (also holds in the case ratio == -1, TODO. */
2756 if (TREE_CODE (cbase) == INTEGER_CST)
2758 offset = - ratio * int_cst_value (cbase);
2759 cost += difference_cost (data,
2760 ubase, integer_zero_node,
2761 &symbol_present, &var_present, &offset,
2762 depends_on);
2764 else if (ratio == 1)
2766 cost += difference_cost (data,
2767 ubase, cbase,
2768 &symbol_present, &var_present, &offset,
2769 depends_on);
2771 else
2773 cost += force_var_cost (data, cbase, depends_on);
2774 cost += add_cost (TYPE_MODE (ctype));
2775 cost += difference_cost (data,
2776 ubase, integer_zero_node,
2777 &symbol_present, &var_present, &offset,
2778 depends_on);
2781 /* If we are after the increment, the value of the candidate is higher by
2782 one iteration. */
2783 if (stmt_after_increment (data->current_loop, cand, at))
2784 offset -= ratio * cstepi;
2786 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2787 (symbol/var/const parts may be omitted). If we are looking for an address,
2788 find the cost of addressing this. */
2789 if (address_p)
2790 return get_address_cost (symbol_present, var_present, offset, ratio);
2792 /* Otherwise estimate the costs for computing the expression. */
2793 aratio = ratio > 0 ? ratio : -ratio;
2794 if (!symbol_present && !var_present && !offset)
2796 if (ratio != 1)
2797 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2799 return cost;
2802 if (aratio != 1)
2803 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2805 n_sums = 1;
2806 if (var_present
2807 /* Symbol + offset should be compile-time computable. */
2808 && (symbol_present || offset))
2809 n_sums++;
2811 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2813 fallback:
2815 /* Just get the expression, expand it and measure the cost. */
2816 tree comp = get_computation_at (data->current_loop, use, cand, at);
2818 if (!comp)
2819 return INFTY;
2821 if (address_p)
2822 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2824 return computation_cost (comp);
2828 /* Determines the cost of the computation by that USE is expressed
2829 from induction variable CAND. If ADDRESS_P is true, we just need
2830 to create an address from it, otherwise we want to get it into
2831 register. A set of invariants we depend on is stored in
2832 DEPENDS_ON. */
2834 static unsigned
2835 get_computation_cost (struct ivopts_data *data,
2836 struct iv_use *use, struct iv_cand *cand,
2837 bool address_p, bitmap *depends_on)
2839 return get_computation_cost_at (data,
2840 use, cand, address_p, depends_on, use->stmt);
2843 /* Determines cost of basing replacement of USE on CAND in a generic
2844 expression. */
2846 static void
2847 determine_use_iv_cost_generic (struct ivopts_data *data,
2848 struct iv_use *use, struct iv_cand *cand)
2850 bitmap depends_on;
2851 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2853 set_use_iv_cost (data, use, cand, cost, depends_on);
2856 /* Determines cost of basing replacement of USE on CAND in an address. */
2858 static void
2859 determine_use_iv_cost_address (struct ivopts_data *data,
2860 struct iv_use *use, struct iv_cand *cand)
2862 bitmap depends_on;
2863 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2865 set_use_iv_cost (data, use, cand, cost, depends_on);
2868 /* Computes value of induction variable IV in iteration NITER. */
2870 static tree
2871 iv_value (struct iv *iv, tree niter)
2873 tree val;
2874 tree type = TREE_TYPE (iv->base);
2876 niter = fold_convert (type, niter);
2877 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2879 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2882 /* Computes value of candidate CAND at position AT in iteration NITER. */
2884 static tree
2885 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2887 tree val = iv_value (cand->iv, niter);
2888 tree type = TREE_TYPE (cand->iv->base);
2890 if (stmt_after_increment (loop, cand, at))
2891 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2893 return val;
2896 /* Check whether it is possible to express the condition in USE by comparison
2897 of candidate CAND. If so, store the comparison code to COMPARE and the
2898 value compared with to BOUND. */
2900 static bool
2901 may_eliminate_iv (struct loop *loop,
2902 struct iv_use *use, struct iv_cand *cand,
2903 enum tree_code *compare, tree *bound)
2905 edge exit;
2906 struct tree_niter_desc *niter, new_niter;
2907 tree wider_type, type, base;
2909 /* For now just very primitive -- we work just for the single exit condition,
2910 and are quite conservative about the possible overflows. TODO -- both of
2911 these can be improved. */
2912 exit = single_dom_exit (loop);
2913 if (!exit)
2914 return false;
2915 if (use->stmt != last_stmt (exit->src))
2916 return false;
2918 niter = &loop_data (loop)->niter;
2919 if (!niter->niter
2920 || !integer_nonzerop (niter->assumptions)
2921 || !integer_zerop (niter->may_be_zero))
2922 return false;
2924 if (exit->flags & EDGE_TRUE_VALUE)
2925 *compare = EQ_EXPR;
2926 else
2927 *compare = NE_EXPR;
2929 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2931 /* Let us check there is not some problem with overflows, by checking that
2932 the number of iterations is unchanged. */
2933 base = cand->iv->base;
2934 type = TREE_TYPE (base);
2935 if (stmt_after_increment (loop, cand, use->stmt))
2936 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2938 new_niter.niter = NULL_TREE;
2939 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2940 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2941 &new_niter);
2942 if (!new_niter.niter
2943 || !integer_nonzerop (new_niter.assumptions)
2944 || !integer_zerop (new_niter.may_be_zero))
2945 return false;
2947 wider_type = TREE_TYPE (new_niter.niter);
2948 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2949 wider_type = TREE_TYPE (niter->niter);
2950 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2951 fold_convert (wider_type, new_niter.niter), 0))
2952 return false;
2954 return true;
2957 /* Determines cost of basing replacement of USE on CAND in a condition. */
2959 static void
2960 determine_use_iv_cost_condition (struct ivopts_data *data,
2961 struct iv_use *use, struct iv_cand *cand)
2963 tree bound;
2964 enum tree_code compare;
2966 /* Only consider real candidates. */
2967 if (!cand->iv)
2969 set_use_iv_cost (data, use, cand, INFTY, NULL);
2970 return;
2973 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
2975 bitmap depends_on = NULL;
2976 unsigned cost = force_var_cost (data, bound, &depends_on);
2978 set_use_iv_cost (data, use, cand, cost, depends_on);
2979 return;
2982 /* The induction variable elimination failed; just express the original
2983 giv. If it is compared with an invariant, note that we cannot get
2984 rid of it. */
2985 if (TREE_CODE (*use->op_p) == SSA_NAME)
2986 record_invariant (data, *use->op_p, true);
2987 else
2989 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
2990 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
2993 determine_use_iv_cost_generic (data, use, cand);
2996 /* Checks whether it is possible to replace the final value of USE by
2997 a direct computation. If so, the formula is stored to *VALUE. */
2999 static bool
3000 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3002 edge exit;
3003 struct tree_niter_desc *niter;
3005 exit = single_dom_exit (loop);
3006 if (!exit)
3007 return false;
3009 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3010 bb_for_stmt (use->stmt)));
3012 niter = &loop_data (loop)->niter;
3013 if (!niter->niter
3014 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3015 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3016 return false;
3018 *value = iv_value (use->iv, niter->niter);
3020 return true;
3023 /* Determines cost of replacing final value of USE using CAND. */
3025 static void
3026 determine_use_iv_cost_outer (struct ivopts_data *data,
3027 struct iv_use *use, struct iv_cand *cand)
3029 bitmap depends_on;
3030 unsigned cost;
3031 edge exit;
3032 tree value;
3033 struct loop *loop = data->current_loop;
3035 if (!cand->iv)
3037 if (!may_replace_final_value (loop, use, &value))
3039 set_use_iv_cost (data, use, cand, INFTY, NULL);
3040 return;
3043 depends_on = NULL;
3044 cost = force_var_cost (data, value, &depends_on);
3046 cost /= AVG_LOOP_NITER (loop);
3048 set_use_iv_cost (data, use, cand, cost, depends_on);
3049 return;
3052 exit = single_dom_exit (loop);
3053 if (exit)
3055 /* If there is just a single exit, we may use value of the candidate
3056 after we take it to determine the value of use. */
3057 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3058 last_stmt (exit->src));
3059 if (cost != INFTY)
3060 cost /= AVG_LOOP_NITER (loop);
3062 else
3064 /* Otherwise we just need to compute the iv. */
3065 cost = get_computation_cost (data, use, cand, false, &depends_on);
3068 set_use_iv_cost (data, use, cand, cost, depends_on);
3071 /* Determines cost of basing replacement of USE on CAND. */
3073 static void
3074 determine_use_iv_cost (struct ivopts_data *data,
3075 struct iv_use *use, struct iv_cand *cand)
3077 switch (use->type)
3079 case USE_NONLINEAR_EXPR:
3080 determine_use_iv_cost_generic (data, use, cand);
3081 break;
3083 case USE_OUTER:
3084 determine_use_iv_cost_outer (data, use, cand);
3085 break;
3087 case USE_ADDRESS:
3088 determine_use_iv_cost_address (data, use, cand);
3089 break;
3091 case USE_COMPARE:
3092 determine_use_iv_cost_condition (data, use, cand);
3093 break;
3095 default:
3096 gcc_unreachable ();
3100 /* Determines costs of basing the use of the iv on an iv candidate. */
3102 static void
3103 determine_use_iv_costs (struct ivopts_data *data)
3105 unsigned i, j;
3106 struct iv_use *use;
3107 struct iv_cand *cand;
3109 data->consider_all_candidates = (n_iv_cands (data)
3110 <= CONSIDER_ALL_CANDIDATES_BOUND);
3112 alloc_use_cost_map (data);
3114 if (!data->consider_all_candidates)
3116 /* Add the important candidate entries. */
3117 for (j = 0; j < n_iv_cands (data); j++)
3119 cand = iv_cand (data, j);
3120 if (!cand->important)
3121 continue;
3122 for (i = 0; i < n_iv_uses (data); i++)
3124 use = iv_use (data, i);
3125 determine_use_iv_cost (data, use, cand);
3130 for (i = 0; i < n_iv_uses (data); i++)
3132 use = iv_use (data, i);
3134 if (data->consider_all_candidates)
3136 for (j = 0; j < n_iv_cands (data); j++)
3138 cand = iv_cand (data, j);
3139 determine_use_iv_cost (data, use, cand);
3142 else
3144 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3146 cand = iv_cand (data, j);
3147 if (!cand->important)
3148 determine_use_iv_cost (data, use, cand);
3153 if (dump_file && (dump_flags & TDF_DETAILS))
3155 fprintf (dump_file, "Use-candidate costs:\n");
3157 for (i = 0; i < n_iv_uses (data); i++)
3159 use = iv_use (data, i);
3161 fprintf (dump_file, "Use %d:\n", i);
3162 fprintf (dump_file, " cand\tcost\tdepends on\n");
3163 for (j = 0; j < use->n_map_members; j++)
3165 if (!use->cost_map[j].cand
3166 || use->cost_map[j].cost == INFTY)
3167 continue;
3169 fprintf (dump_file, " %d\t%d\t",
3170 use->cost_map[j].cand->id,
3171 use->cost_map[j].cost);
3172 if (use->cost_map[j].depends_on)
3173 bitmap_print (dump_file,
3174 use->cost_map[j].depends_on, "","");
3175 fprintf (dump_file, "\n");
3178 fprintf (dump_file, "\n");
3180 fprintf (dump_file, "\n");
3184 /* Determines cost of the candidate CAND. */
3186 static void
3187 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3189 unsigned cost_base, cost_step;
3190 tree base, last;
3191 basic_block bb;
3193 if (!cand->iv)
3195 cand->cost = 0;
3196 return;
3199 /* There are two costs associated with the candidate -- its increment
3200 and its initialization. The second is almost negligible for any loop
3201 that rolls enough, so we take it just very little into account. */
3203 base = cand->iv->base;
3204 cost_base = force_var_cost (data, base, NULL);
3205 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3207 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3209 /* Prefer the original iv unless we may gain something by replacing it. */
3210 if (cand->pos == IP_ORIGINAL)
3211 cand->cost--;
3213 /* Prefer not to insert statements into latch unless there are some
3214 already (so that we do not create unnecessary jumps). */
3215 if (cand->pos == IP_END)
3217 bb = ip_end_pos (data->current_loop);
3218 last = last_stmt (bb);
3220 if (!last
3221 || TREE_CODE (last) == LABEL_EXPR)
3222 cand->cost++;
3226 /* Determines costs of computation of the candidates. */
3228 static void
3229 determine_iv_costs (struct ivopts_data *data)
3231 unsigned i;
3233 if (dump_file && (dump_flags & TDF_DETAILS))
3235 fprintf (dump_file, "Candidate costs:\n");
3236 fprintf (dump_file, " cand\tcost\n");
3239 for (i = 0; i < n_iv_cands (data); i++)
3241 struct iv_cand *cand = iv_cand (data, i);
3243 determine_iv_cost (data, cand);
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "\n");
3253 /* Calculates cost for having SIZE induction variables. */
3255 static unsigned
3256 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3258 return global_cost_for_size (size,
3259 loop_data (data->current_loop)->regs_used,
3260 n_iv_uses (data));
3263 /* For each size of the induction variable set determine the penalty. */
3265 static void
3266 determine_set_costs (struct ivopts_data *data)
3268 unsigned j, n;
3269 tree phi, op;
3270 struct loop *loop = data->current_loop;
3272 /* We use the following model (definitely improvable, especially the
3273 cost function -- TODO):
3275 We estimate the number of registers available (using MD data), name it A.
3277 We estimate the number of registers used by the loop, name it U. This
3278 number is obtained as the number of loop phi nodes (not counting virtual
3279 registers and bivs) + the number of variables from outside of the loop.
3281 We set a reserve R (free regs that are used for temporary computations,
3282 etc.). For now the reserve is a constant 3.
3284 Let I be the number of induction variables.
3286 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3287 make a lot of ivs without a reason).
3288 -- if A - R < U + I <= A, the cost is I * PRES_COST
3289 -- if U + I > A, the cost is I * PRES_COST and
3290 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3292 if (dump_file && (dump_flags & TDF_DETAILS))
3294 fprintf (dump_file, "Global costs:\n");
3295 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3296 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3297 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3298 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3301 n = 0;
3302 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3304 op = PHI_RESULT (phi);
3306 if (!is_gimple_reg (op))
3307 continue;
3309 if (get_iv (data, op))
3310 continue;
3312 n++;
3315 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3317 struct version_info *info = ver_info (data, j);
3319 if (info->inv_id && info->has_nonlin_use)
3320 n++;
3323 loop_data (loop)->regs_used = n;
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3325 fprintf (dump_file, " regs_used %d\n", n);
3327 if (dump_file && (dump_flags & TDF_DETAILS))
3329 fprintf (dump_file, " cost for size:\n");
3330 fprintf (dump_file, " ivs\tcost\n");
3331 for (j = 0; j <= 2 * target_avail_regs; j++)
3332 fprintf (dump_file, " %d\t%d\n", j,
3333 ivopts_global_cost_for_size (data, j));
3334 fprintf (dump_file, "\n");
3338 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3339 taken from the set SOL and they may depend on invariants in the set INV.
3340 The really used candidate and invariants are noted to USED_IVS and
3341 USED_INV. */
3343 static unsigned
3344 find_best_candidate (struct ivopts_data *data,
3345 struct iv_use *use, bitmap sol, bitmap inv,
3346 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3348 unsigned c, d;
3349 unsigned best_cost = INFTY, cost;
3350 struct iv_cand *cnd = NULL, *acnd;
3351 bitmap depends_on = NULL, asol;
3353 if (data->consider_all_candidates)
3354 asol = sol;
3355 else
3357 asol = BITMAP_XMALLOC ();
3358 bitmap_a_and_b (asol, sol, use->related_cands);
3361 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3363 acnd = iv_cand (data, c);
3364 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3366 if (cost == INFTY)
3367 goto next_cand;
3368 if (cost > best_cost)
3369 goto next_cand;
3370 if (cost == best_cost)
3372 /* Prefer the cheaper iv. */
3373 if (acnd->cost >= cnd->cost)
3374 goto next_cand;
3377 if (depends_on)
3379 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3380 goto next_cand);
3381 if (used_inv)
3382 bitmap_a_or_b (used_inv, used_inv, depends_on);
3385 cnd = acnd;
3386 best_cost = cost;
3387 next_cand: ;
3390 if (cnd && used_ivs)
3391 bitmap_set_bit (used_ivs, cnd->id);
3393 if (cand)
3394 *cand = cnd;
3396 if (!data->consider_all_candidates)
3397 BITMAP_XFREE (asol);
3399 return best_cost;
3402 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3403 induction variable candidates and invariants from the sets. Only
3404 uses 0 .. MAX_USE - 1 are taken into account. */
3406 static unsigned
3407 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3408 unsigned max_use)
3410 unsigned i;
3411 unsigned cost = 0, size = 0, acost;
3412 struct iv_use *use;
3413 struct iv_cand *cand;
3414 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3416 for (i = 0; i < max_use; i++)
3418 use = iv_use (data, i);
3419 acost = find_best_candidate (data, use, sol, inv,
3420 used_ivs, used_inv, NULL);
3421 if (acost == INFTY)
3423 BITMAP_XFREE (used_ivs);
3424 BITMAP_XFREE (used_inv);
3425 return INFTY;
3427 cost += acost;
3430 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3432 cand = iv_cand (data, i);
3434 /* Do not count the pseudocandidates. */
3435 if (cand->iv)
3436 size++;
3438 cost += cand->cost;
3440 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3441 cost += ivopts_global_cost_for_size (data, size);
3443 bitmap_copy (sol, used_ivs);
3444 bitmap_copy (inv, used_inv);
3446 BITMAP_XFREE (used_ivs);
3447 BITMAP_XFREE (used_inv);
3449 return cost;
3452 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3453 induction variable candidates and invariants from the sets. */
3455 static unsigned
3456 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3458 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3461 /* Tries to extend the sets IVS and INV in the best possible way in order
3462 to express the USE. */
3464 static bool
3465 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3466 struct iv_use *use)
3468 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3469 bitmap best_ivs = BITMAP_XMALLOC ();
3470 bitmap best_inv = BITMAP_XMALLOC ();
3471 bitmap act_ivs = BITMAP_XMALLOC ();
3472 bitmap act_inv = BITMAP_XMALLOC ();
3473 unsigned i;
3474 struct cost_pair *cp;
3476 bitmap_copy (best_ivs, ivs);
3477 bitmap_copy (best_inv, inv);
3479 for (i = 0; i < use->n_map_members; i++)
3481 cp = use->cost_map + i;
3482 if (cp->cost == INFTY)
3483 continue;
3485 bitmap_copy (act_ivs, ivs);
3486 bitmap_set_bit (act_ivs, cp->cand->id);
3487 if (cp->depends_on)
3488 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3489 else
3490 bitmap_copy (act_inv, inv);
3491 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3493 if (act_cost < best_cost)
3495 best_cost = act_cost;
3496 bitmap_copy (best_ivs, act_ivs);
3497 bitmap_copy (best_inv, act_inv);
3501 bitmap_copy (ivs, best_ivs);
3502 bitmap_copy (inv, best_inv);
3504 BITMAP_XFREE (best_ivs);
3505 BITMAP_XFREE (best_inv);
3506 BITMAP_XFREE (act_ivs);
3507 BITMAP_XFREE (act_inv);
3509 return (best_cost != INFTY);
3512 /* Finds an initial set of IVS and invariants INV. We do this by simply
3513 choosing the best candidate for each use. */
3515 static unsigned
3516 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3518 unsigned i;
3520 for (i = 0; i < n_iv_uses (data); i++)
3521 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3522 return INFTY;
3524 return set_cost (data, ivs, inv);
3527 /* Tries to improve set of induction variables IVS and invariants INV to get
3528 it better than COST. */
3530 static bool
3531 try_improve_iv_set (struct ivopts_data *data,
3532 bitmap ivs, bitmap inv, unsigned *cost)
3534 unsigned i, acost;
3535 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3536 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3538 /* Try altering the set of induction variables by one. */
3539 for (i = 0; i < n_iv_cands (data); i++)
3541 bitmap_copy (new_ivs, ivs);
3542 bitmap_copy (new_inv, inv);
3544 if (bitmap_bit_p (ivs, i))
3545 bitmap_clear_bit (new_ivs, i);
3546 else
3547 bitmap_set_bit (new_ivs, i);
3549 acost = set_cost (data, new_ivs, new_inv);
3550 if (acost >= *cost)
3551 continue;
3553 if (!best_new_ivs)
3555 best_new_ivs = BITMAP_XMALLOC ();
3556 best_new_inv = BITMAP_XMALLOC ();
3559 *cost = acost;
3560 bitmap_copy (best_new_ivs, new_ivs);
3561 bitmap_copy (best_new_inv, new_inv);
3564 /* Ditto for invariants. */
3565 for (i = 1; i <= data->max_inv_id; i++)
3567 if (ver_info (data, i)->has_nonlin_use)
3568 continue;
3570 bitmap_copy (new_ivs, ivs);
3571 bitmap_copy (new_inv, inv);
3573 if (bitmap_bit_p (inv, i))
3574 bitmap_clear_bit (new_inv, i);
3575 else
3576 bitmap_set_bit (new_inv, i);
3578 acost = set_cost (data, new_ivs, new_inv);
3579 if (acost >= *cost)
3580 continue;
3582 if (!best_new_ivs)
3584 best_new_ivs = BITMAP_XMALLOC ();
3585 best_new_inv = BITMAP_XMALLOC ();
3588 *cost = acost;
3589 bitmap_copy (best_new_ivs, new_ivs);
3590 bitmap_copy (best_new_inv, new_inv);
3593 BITMAP_XFREE (new_ivs);
3594 BITMAP_XFREE (new_inv);
3596 if (!best_new_ivs)
3597 return false;
3599 bitmap_copy (ivs, best_new_ivs);
3600 bitmap_copy (inv, best_new_inv);
3601 BITMAP_XFREE (best_new_ivs);
3602 BITMAP_XFREE (best_new_inv);
3603 return true;
3606 /* Attempts to find the optimal set of induction variables. We do simple
3607 greedy heuristic -- we try to replace at most one candidate in the selected
3608 solution and remove the unused ivs while this improves the cost. */
3610 static bitmap
3611 find_optimal_iv_set (struct ivopts_data *data)
3613 unsigned cost, i;
3614 bitmap set = BITMAP_XMALLOC ();
3615 bitmap inv = BITMAP_XMALLOC ();
3616 struct iv_use *use;
3618 /* Set the upper bound. */
3619 cost = get_initial_solution (data, set, inv);
3620 if (cost == INFTY)
3622 if (dump_file && (dump_flags & TDF_DETAILS))
3623 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3624 BITMAP_XFREE (inv);
3625 BITMAP_XFREE (set);
3626 return NULL;
3629 if (dump_file && (dump_flags & TDF_DETAILS))
3631 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3632 bitmap_print (dump_file, set, "", "");
3633 fprintf (dump_file, " invariants ");
3634 bitmap_print (dump_file, inv, "", "");
3635 fprintf (dump_file, "\n");
3638 while (try_improve_iv_set (data, set, inv, &cost))
3640 if (dump_file && (dump_flags & TDF_DETAILS))
3642 fprintf (dump_file, "Improved to (cost %d): ", cost);
3643 bitmap_print (dump_file, set, "", "");
3644 fprintf (dump_file, " invariants ");
3645 bitmap_print (dump_file, inv, "", "");
3646 fprintf (dump_file, "\n");
3650 if (dump_file && (dump_flags & TDF_DETAILS))
3651 fprintf (dump_file, "Final cost %d\n\n", cost);
3653 for (i = 0; i < n_iv_uses (data); i++)
3655 use = iv_use (data, i);
3656 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3659 BITMAP_XFREE (inv);
3661 return set;
3664 /* Creates a new induction variable corresponding to CAND. */
3666 static void
3667 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3669 block_stmt_iterator incr_pos;
3670 tree base;
3671 bool after = false;
3673 if (!cand->iv)
3674 return;
3676 switch (cand->pos)
3678 case IP_NORMAL:
3679 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3680 break;
3682 case IP_END:
3683 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3684 after = true;
3685 break;
3687 case IP_ORIGINAL:
3688 /* Mark that the iv is preserved. */
3689 name_info (data, cand->var_before)->preserve_biv = true;
3690 name_info (data, cand->var_after)->preserve_biv = true;
3692 /* Rewrite the increment so that it uses var_before directly. */
3693 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3695 return;
3698 gimple_add_tmp_var (cand->var_before);
3699 add_referenced_tmp_var (cand->var_before);
3701 base = unshare_expr (cand->iv->base);
3703 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3704 &incr_pos, after, &cand->var_before, &cand->var_after);
3707 /* Creates new induction variables described in SET. */
3709 static void
3710 create_new_ivs (struct ivopts_data *data, bitmap set)
3712 unsigned i;
3713 struct iv_cand *cand;
3715 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3717 cand = iv_cand (data, i);
3718 create_new_iv (data, cand);
3722 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3723 is true, remove also the ssa name defined by the statement. */
3725 static void
3726 remove_statement (tree stmt, bool including_defined_name)
3728 if (TREE_CODE (stmt) == PHI_NODE)
3730 if (!including_defined_name)
3732 /* Prevent the ssa name defined by the statement from being removed. */
3733 SET_PHI_RESULT (stmt, NULL);
3735 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3737 else
3739 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3741 bsi_remove (&bsi);
3745 /* Rewrites USE (definition of iv used in a nonlinear expression)
3746 using candidate CAND. */
3748 static void
3749 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3750 struct iv_use *use, struct iv_cand *cand)
3752 tree comp = unshare_expr (get_computation (data->current_loop,
3753 use, cand));
3754 tree op, stmts, tgt, ass;
3755 block_stmt_iterator bsi, pbsi;
3757 switch (TREE_CODE (use->stmt))
3759 case PHI_NODE:
3760 tgt = PHI_RESULT (use->stmt);
3762 /* If we should keep the biv, do not replace it. */
3763 if (name_info (data, tgt)->preserve_biv)
3764 return;
3766 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3767 while (!bsi_end_p (pbsi)
3768 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3770 bsi = pbsi;
3771 bsi_next (&pbsi);
3773 break;
3775 case MODIFY_EXPR:
3776 tgt = TREE_OPERAND (use->stmt, 0);
3777 bsi = stmt_for_bsi (use->stmt);
3778 break;
3780 default:
3781 gcc_unreachable ();
3784 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3786 if (TREE_CODE (use->stmt) == PHI_NODE)
3788 if (stmts)
3789 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3790 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3791 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3792 remove_statement (use->stmt, false);
3793 SSA_NAME_DEF_STMT (tgt) = ass;
3795 else
3797 if (stmts)
3798 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3799 TREE_OPERAND (use->stmt, 1) = op;
3803 /* Replaces ssa name in index IDX by its basic variable. Callback for
3804 for_each_index. */
3806 static bool
3807 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3808 void *data ATTRIBUTE_UNUSED)
3810 if (TREE_CODE (*idx) == SSA_NAME)
3811 *idx = SSA_NAME_VAR (*idx);
3812 return true;
3815 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3817 static tree
3818 unshare_and_remove_ssa_names (tree ref)
3820 ref = unshare_expr (ref);
3821 for_each_index (&ref, idx_remove_ssa_names, NULL);
3823 return ref;
3826 /* Rewrites base of memory access OP with expression WITH in statement
3827 pointed to by BSI. */
3829 static void
3830 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3832 tree var = get_base_address (*op), new_var, new_name, copy, name;
3833 tree orig;
3835 if (!var || TREE_CODE (with) != SSA_NAME)
3836 goto do_rewrite;
3838 if (TREE_CODE (var) == INDIRECT_REF)
3839 var = TREE_OPERAND (var, 0);
3840 if (TREE_CODE (var) == SSA_NAME)
3842 name = var;
3843 var = SSA_NAME_VAR (var);
3845 else if (DECL_P (var))
3846 name = NULL_TREE;
3847 else
3848 goto do_rewrite;
3850 if (var_ann (var)->type_mem_tag)
3851 var = var_ann (var)->type_mem_tag;
3853 /* We need to add a memory tag for the variable. But we do not want
3854 to add it to the temporary used for the computations, since this leads
3855 to problems in redundancy elimination when there are common parts
3856 in two computations referring to the different arrays. So we copy
3857 the variable to a new temporary. */
3858 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3859 if (name)
3860 new_name = duplicate_ssa_name (name, copy);
3861 else
3863 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3864 add_referenced_tmp_var (new_var);
3865 var_ann (new_var)->type_mem_tag = var;
3866 new_name = make_ssa_name (new_var, copy);
3868 TREE_OPERAND (copy, 0) = new_name;
3869 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3870 with = new_name;
3872 do_rewrite:
3874 orig = NULL_TREE;
3875 if (TREE_CODE (*op) == INDIRECT_REF)
3876 orig = REF_ORIGINAL (*op);
3877 if (!orig)
3878 orig = unshare_and_remove_ssa_names (*op);
3880 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3881 /* Record the original reference, for purposes of alias analysis. */
3882 REF_ORIGINAL (*op) = orig;
3885 /* Rewrites USE (address that is an iv) using candidate CAND. */
3887 static void
3888 rewrite_use_address (struct ivopts_data *data,
3889 struct iv_use *use, struct iv_cand *cand)
3891 tree comp = unshare_expr (get_computation (data->current_loop,
3892 use, cand));
3893 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3894 tree stmts;
3895 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3897 if (stmts)
3898 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3900 rewrite_address_base (&bsi, use->op_p, op);
3903 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3904 candidate CAND. */
3906 static void
3907 rewrite_use_compare (struct ivopts_data *data,
3908 struct iv_use *use, struct iv_cand *cand)
3910 tree comp;
3911 tree *op_p, cond, op, stmts, bound;
3912 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3913 enum tree_code compare;
3915 if (may_eliminate_iv (data->current_loop,
3916 use, cand, &compare, &bound))
3918 op = force_gimple_operand (unshare_expr (bound), &stmts,
3919 true, NULL_TREE);
3921 if (stmts)
3922 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3924 *use->op_p = build2 (compare, boolean_type_node,
3925 var_at_stmt (data->current_loop,
3926 cand, use->stmt), op);
3927 modify_stmt (use->stmt);
3928 return;
3931 /* The induction variable elimination failed; just express the original
3932 giv. */
3933 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3935 cond = *use->op_p;
3936 op_p = &TREE_OPERAND (cond, 0);
3937 if (TREE_CODE (*op_p) != SSA_NAME
3938 || zero_p (get_iv (data, *op_p)->step))
3939 op_p = &TREE_OPERAND (cond, 1);
3941 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3942 if (stmts)
3943 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3945 *op_p = op;
3948 /* Ensure that operand *OP_P may be used at the end of EXIT without
3949 violating loop closed ssa form. */
3951 static void
3952 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3954 basic_block def_bb;
3955 struct loop *def_loop;
3956 tree phi, use;
3958 use = USE_FROM_PTR (op_p);
3959 if (TREE_CODE (use) != SSA_NAME)
3960 return;
3962 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3963 if (!def_bb)
3964 return;
3966 def_loop = def_bb->loop_father;
3967 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3968 return;
3970 /* Try finding a phi node that copies the value out of the loop. */
3971 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
3972 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
3973 break;
3975 if (!phi)
3977 /* Create such a phi node. */
3978 tree new_name = duplicate_ssa_name (use, NULL);
3980 phi = create_phi_node (new_name, exit->dest);
3981 SSA_NAME_DEF_STMT (new_name) = phi;
3982 add_phi_arg (&phi, use, exit);
3985 SET_USE (op_p, PHI_RESULT (phi));
3988 /* Ensure that operands of STMT may be used at the end of EXIT without
3989 violating loop closed ssa form. */
3991 static void
3992 protect_loop_closed_ssa_form (edge exit, tree stmt)
3994 use_optype uses;
3995 vuse_optype vuses;
3996 v_may_def_optype v_may_defs;
3997 unsigned i;
3999 get_stmt_operands (stmt);
4001 uses = STMT_USE_OPS (stmt);
4002 for (i = 0; i < NUM_USES (uses); i++)
4003 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4005 vuses = STMT_VUSE_OPS (stmt);
4006 for (i = 0; i < NUM_VUSES (vuses); i++)
4007 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4009 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4010 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4011 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4014 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4015 so that they are emitted on the correct place, and so that the loop closed
4016 ssa form is preserved. */
4018 static void
4019 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4021 tree_stmt_iterator tsi;
4022 block_stmt_iterator bsi;
4023 tree phi, stmt, def, next;
4025 if (EDGE_COUNT (exit->dest->preds) > 1)
4026 split_loop_exit_edge (exit);
4028 if (TREE_CODE (stmts) == STATEMENT_LIST)
4030 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4031 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4033 else
4034 protect_loop_closed_ssa_form (exit, stmts);
4036 /* Ensure there is label in exit->dest, so that we can
4037 insert after it. */
4038 tree_block_label (exit->dest);
4039 bsi = bsi_after_labels (exit->dest);
4040 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4042 if (!op)
4043 return;
4045 for (phi = phi_nodes (exit->dest); phi; phi = next)
4047 next = TREE_CHAIN (phi);
4049 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4051 def = PHI_RESULT (phi);
4052 remove_statement (phi, false);
4053 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4054 def, op);
4055 SSA_NAME_DEF_STMT (def) = stmt;
4056 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4061 /* Rewrites the final value of USE (that is only needed outside of the loop)
4062 using candidate CAND. */
4064 static void
4065 rewrite_use_outer (struct ivopts_data *data,
4066 struct iv_use *use, struct iv_cand *cand)
4068 edge exit;
4069 tree value, op, stmts, tgt;
4070 tree phi;
4072 switch (TREE_CODE (use->stmt))
4074 case PHI_NODE:
4075 tgt = PHI_RESULT (use->stmt);
4076 break;
4077 case MODIFY_EXPR:
4078 tgt = TREE_OPERAND (use->stmt, 0);
4079 break;
4080 default:
4081 gcc_unreachable ();
4084 exit = single_dom_exit (data->current_loop);
4086 if (exit)
4088 if (!cand->iv)
4090 bool ok = may_replace_final_value (data->current_loop, use, &value);
4091 gcc_assert (ok);
4093 else
4094 value = get_computation_at (data->current_loop,
4095 use, cand, last_stmt (exit->src));
4097 value = unshare_expr (value);
4098 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4100 /* If we will preserve the iv anyway and we would need to perform
4101 some computation to replace the final value, do nothing. */
4102 if (stmts && name_info (data, tgt)->preserve_biv)
4103 return;
4105 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4107 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4109 if (USE_FROM_PTR (use_p) == tgt)
4110 SET_USE (use_p, op);
4113 if (stmts)
4114 compute_phi_arg_on_exit (exit, stmts, op);
4116 /* Enable removal of the statement. We cannot remove it directly,
4117 since we may still need the aliasing information attached to the
4118 ssa name defined by it. */
4119 name_info (data, tgt)->iv->have_use_for = false;
4120 return;
4123 /* If the variable is going to be preserved anyway, there is nothing to
4124 do. */
4125 if (name_info (data, tgt)->preserve_biv)
4126 return;
4128 /* Otherwise we just need to compute the iv. */
4129 rewrite_use_nonlinear_expr (data, use, cand);
4132 /* Rewrites USE using candidate CAND. */
4134 static void
4135 rewrite_use (struct ivopts_data *data,
4136 struct iv_use *use, struct iv_cand *cand)
4138 switch (use->type)
4140 case USE_NONLINEAR_EXPR:
4141 rewrite_use_nonlinear_expr (data, use, cand);
4142 break;
4144 case USE_OUTER:
4145 rewrite_use_outer (data, use, cand);
4146 break;
4148 case USE_ADDRESS:
4149 rewrite_use_address (data, use, cand);
4150 break;
4152 case USE_COMPARE:
4153 rewrite_use_compare (data, use, cand);
4154 break;
4156 default:
4157 gcc_unreachable ();
4159 modify_stmt (use->stmt);
4162 /* Rewrite the uses using the selected induction variables. */
4164 static void
4165 rewrite_uses (struct ivopts_data *data)
4167 unsigned i;
4168 struct iv_cand *cand;
4169 struct iv_use *use;
4171 for (i = 0; i < n_iv_uses (data); i++)
4173 use = iv_use (data, i);
4174 cand = use->selected;
4175 gcc_assert (cand);
4177 rewrite_use (data, use, cand);
4181 /* Removes the ivs that are not used after rewriting. */
4183 static void
4184 remove_unused_ivs (struct ivopts_data *data)
4186 unsigned j;
4188 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4190 struct version_info *info;
4192 info = ver_info (data, j);
4193 if (info->iv
4194 && !zero_p (info->iv->step)
4195 && !info->inv_id
4196 && !info->iv->have_use_for
4197 && !info->preserve_biv)
4198 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4202 /* Frees data allocated by the optimization of a single loop. */
4204 static void
4205 free_loop_data (struct ivopts_data *data)
4207 unsigned i, j;
4209 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4211 struct version_info *info;
4213 info = ver_info (data, i);
4214 if (info->iv)
4215 free (info->iv);
4216 info->iv = NULL;
4217 info->has_nonlin_use = false;
4218 info->preserve_biv = false;
4219 info->inv_id = 0;
4221 bitmap_clear (data->relevant);
4223 for (i = 0; i < n_iv_uses (data); i++)
4225 struct iv_use *use = iv_use (data, i);
4227 free (use->iv);
4228 BITMAP_XFREE (use->related_cands);
4229 for (j = 0; j < use->n_map_members; j++)
4230 if (use->cost_map[j].depends_on)
4231 BITMAP_XFREE (use->cost_map[j].depends_on);
4232 free (use->cost_map);
4233 free (use);
4235 VARRAY_POP_ALL (data->iv_uses);
4237 for (i = 0; i < n_iv_cands (data); i++)
4239 struct iv_cand *cand = iv_cand (data, i);
4241 if (cand->iv)
4242 free (cand->iv);
4243 free (cand);
4245 VARRAY_POP_ALL (data->iv_candidates);
4247 if (data->version_info_size < num_ssa_names)
4249 data->version_info_size = 2 * num_ssa_names;
4250 free (data->version_info);
4251 data->version_info = xcalloc (data->version_info_size,
4252 sizeof (struct version_info));
4255 data->max_inv_id = 0;
4257 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4259 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4261 SET_DECL_RTL (obj, NULL_RTX);
4263 VARRAY_POP_ALL (decl_rtl_to_reset);
4266 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4267 loop tree. */
4269 static void
4270 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4272 unsigned i;
4274 for (i = 1; i < loops->num; i++)
4275 if (loops->parray[i])
4277 free (loops->parray[i]->aux);
4278 loops->parray[i]->aux = NULL;
4281 free_loop_data (data);
4282 free (data->version_info);
4283 BITMAP_XFREE (data->relevant);
4285 VARRAY_FREE (decl_rtl_to_reset);
4286 VARRAY_FREE (data->iv_uses);
4287 VARRAY_FREE (data->iv_candidates);
4290 /* Optimizes the LOOP. Returns true if anything changed. */
4292 static bool
4293 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4295 bool changed = false;
4296 bitmap iv_set;
4297 edge exit;
4299 data->current_loop = loop;
4301 if (dump_file && (dump_flags & TDF_DETAILS))
4303 fprintf (dump_file, "Processing loop %d\n", loop->num);
4305 exit = single_dom_exit (loop);
4306 if (exit)
4308 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4309 exit->src->index, exit->dest->index);
4310 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4311 fprintf (dump_file, "\n");
4314 fprintf (dump_file, "\n");
4317 /* For each ssa name determines whether it behaves as an induction variable
4318 in some loop. */
4319 if (!find_induction_variables (data))
4320 goto finish;
4322 /* Finds interesting uses (item 1). */
4323 find_interesting_uses (data);
4324 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4325 goto finish;
4327 /* Finds candidates for the induction variables (item 2). */
4328 find_iv_candidates (data);
4330 /* Calculates the costs (item 3, part 1). */
4331 determine_use_iv_costs (data);
4332 determine_iv_costs (data);
4333 determine_set_costs (data);
4335 /* Find the optimal set of induction variables (item 3, part 2). */
4336 iv_set = find_optimal_iv_set (data);
4337 if (!iv_set)
4338 goto finish;
4339 changed = true;
4341 /* Create the new induction variables (item 4, part 1). */
4342 create_new_ivs (data, iv_set);
4344 /* Rewrite the uses (item 4, part 2). */
4345 rewrite_uses (data);
4347 /* Remove the ivs that are unused after rewriting. */
4348 remove_unused_ivs (data);
4350 loop_commit_inserts ();
4352 BITMAP_XFREE (iv_set);
4354 /* We have changed the structure of induction variables; it might happen
4355 that definitions in the scev database refer to some of them that were
4356 eliminated. */
4357 scev_reset ();
4359 finish:
4360 free_loop_data (data);
4362 return changed;
4365 /* Main entry point. Optimizes induction variables in LOOPS. */
4367 void
4368 tree_ssa_iv_optimize (struct loops *loops)
4370 struct loop *loop;
4371 struct ivopts_data data;
4373 tree_ssa_iv_optimize_init (loops, &data);
4375 /* Optimize the loops starting with the innermost ones. */
4376 loop = loops->tree_root;
4377 while (loop->inner)
4378 loop = loop->inner;
4380 #ifdef ENABLE_CHECKING
4381 verify_loop_closed_ssa ();
4382 verify_stmts ();
4383 #endif
4385 /* Scan the loops, inner ones first. */
4386 while (loop != loops->tree_root)
4388 if (dump_file && (dump_flags & TDF_DETAILS))
4389 flow_loop_dump (loop, dump_file, NULL, 1);
4390 if (tree_ssa_iv_optimize_loop (&data, loop))
4392 #ifdef ENABLE_CHECKING
4393 verify_loop_closed_ssa ();
4394 verify_stmts ();
4395 #endif
4398 if (loop->next)
4400 loop = loop->next;
4401 while (loop->inner)
4402 loop = loop->inner;
4404 else
4405 loop = loop->outer;
4408 tree_ssa_iv_optimize_finalize (loops, &data);