* common.opt (flag_gcse_sm): Disable by default.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob33c275fda3b54b371b486adfe4a059e3deff7097
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
126 struct loop_data
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
134 /* Types of uses. */
135 enum use_type
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
144 struct cost_pair
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
149 preserved. */
152 /* Use. */
153 struct iv_use
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 struct ivopts_data
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
222 use. */
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
238 here. */
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
279 return loop->aux;
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
284 static edge
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
289 if (!exit)
290 return NULL;
292 if (!just_once_each_iteration_p (loop, exit->src))
293 return NULL;
295 return exit;
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
301 void
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
312 if (iv->step)
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
322 else
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
329 if (iv->biv_p)
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
336 void
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
343 switch (use->type)
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
347 break;
349 case USE_OUTER:
350 fprintf (file, " outside\n");
351 break;
353 case USE_ADDRESS:
354 fprintf (file, " address\n");
355 break;
357 case USE_COMPARE:
358 fprintf (file, " compare\n");
359 break;
361 default:
362 gcc_unreachable ();
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
370 if (use->op_p)
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
402 void
403 dump_uses (FILE *file, struct ivopts_data *data)
405 unsigned i;
406 struct iv_use *use;
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
420 void
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
428 if (!iv)
430 fprintf (file, " final value replacement\n");
431 return;
434 switch (cand->pos)
436 case IP_NORMAL:
437 fprintf (file, " incremented before exit test\n");
438 break;
440 case IP_END:
441 fprintf (file, " incremented at end\n");
442 break;
444 case IP_ORIGINAL:
445 fprintf (file, " original biv\n");
446 break;
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
453 if (iv->step)
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
463 else
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
488 2^BITS. */
490 static bool
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
492 HOST_WIDE_INT *x)
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
496 unsigned i;
498 a &= mask;
499 b &= mask;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
504 a >>= 1;
505 b >>= 1;
506 bits--;
507 mask >>= 1;
510 if (!(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
513 return false;
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
518 inv = 1;
519 ex = b;
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
531 val |= ~mask;
533 *x = val;
535 return true;
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
539 emitted in LOOP. */
541 static bool
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
546 gcc_assert (bb);
548 if (sbb == loop->latch)
549 return true;
551 if (sbb != bb)
552 return false;
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
560 static bool
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
568 return false;
570 if (stmt_bb != cand_bb)
571 return true;
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
578 return false;
579 if (bsi_stmt (bsi) == stmt)
580 return true;
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
587 static bool
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
590 switch (cand->pos)
592 case IP_END:
593 return false;
595 case IP_NORMAL:
596 return stmt_after_ip_normal_pos (loop, stmt);
598 case IP_ORIGINAL:
599 return stmt_after_ip_original_pos (cand, stmt);
601 default:
602 gcc_unreachable ();
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
609 static void
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
612 unsigned i;
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
630 for loop LOOP. */
632 static struct iv *
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
638 step = NULL_TREE;
640 iv->base = base;
641 iv->step = step;
642 iv->biv_p = false;
643 iv->have_use_for = false;
644 iv->use_id = 0;
645 iv->ssa_name = NULL_TREE;
647 return iv;
650 /* Sets STEP and BASE for induction variable IV. */
652 static void
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
666 static struct iv *
667 get_iv (struct ivopts_data *data, tree var)
669 basic_block bb;
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
675 if (!bb
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
685 static tree
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
693 return NULL_TREE;
695 if (!simple_iv (loop, phi, name, &base, &step))
696 return NULL_TREE;
698 if (!step)
699 return build_int_cst (type, 0);
701 return step;
704 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
706 static bool
707 abnormal_ssa_name_p (tree exp)
709 if (!exp)
710 return false;
712 if (TREE_CODE (exp) != SSA_NAME)
713 return false;
715 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
718 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
719 abnormal phi node. Callback for for_each_index. */
721 static bool
722 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
723 void *data ATTRIBUTE_UNUSED)
725 if (TREE_CODE (base) == ARRAY_REF)
727 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
728 return false;
729 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
730 return false;
733 return !abnormal_ssa_name_p (*index);
736 /* Returns true if EXPR contains a ssa name that occurs in an
737 abnormal phi node. */
739 static bool
740 contains_abnormal_ssa_name_p (tree expr)
742 enum tree_code code = TREE_CODE (expr);
743 enum tree_code_class class = TREE_CODE_CLASS (code);
745 if (code == SSA_NAME)
746 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
748 if (code == INTEGER_CST
749 || is_gimple_min_invariant (expr))
750 return false;
752 if (code == ADDR_EXPR)
753 return !for_each_index (&TREE_OPERAND (expr, 1),
754 idx_contains_abnormal_ssa_name_p,
755 NULL);
757 switch (class)
759 case tcc_binary:
760 case tcc_comparison:
761 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
762 return true;
764 /* Fallthru. */
765 case tcc_unary:
766 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
767 return true;
769 break;
771 default:
772 gcc_unreachable ();
775 return false;
778 /* Finds basic ivs. */
780 static bool
781 find_bivs (struct ivopts_data *data)
783 tree phi, step, type, base;
784 bool found = false;
785 struct loop *loop = data->current_loop;
787 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
789 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
790 continue;
792 step = determine_biv_step (phi);
794 if (!step)
795 continue;
796 if (cst_and_fits_in_hwi (step)
797 && int_cst_value (step) == 0)
798 continue;
800 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
801 if (contains_abnormal_ssa_name_p (base))
802 continue;
804 type = TREE_TYPE (PHI_RESULT (phi));
805 base = fold_convert (type, base);
806 step = fold_convert (type, step);
808 /* FIXME: We do not handle induction variables whose step does
809 not satisfy cst_and_fits_in_hwi. */
810 if (!cst_and_fits_in_hwi (step))
811 continue;
813 set_iv (data, PHI_RESULT (phi), base, step);
814 found = true;
817 return found;
820 /* Marks basic ivs. */
822 static void
823 mark_bivs (struct ivopts_data *data)
825 tree phi, var;
826 struct iv *iv, *incr_iv;
827 struct loop *loop = data->current_loop;
828 basic_block incr_bb;
830 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
832 iv = get_iv (data, PHI_RESULT (phi));
833 if (!iv)
834 continue;
836 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
837 incr_iv = get_iv (data, var);
838 if (!incr_iv)
839 continue;
841 /* If the increment is in the subloop, ignore it. */
842 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
843 if (incr_bb->loop_father != data->current_loop
844 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
845 continue;
847 iv->biv_p = true;
848 incr_iv->biv_p = true;
852 /* Checks whether STMT defines a linear induction variable and stores its
853 parameters to BASE and STEP. */
855 static bool
856 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
857 tree *base, tree *step)
859 tree lhs;
860 struct loop *loop = data->current_loop;
862 *base = NULL_TREE;
863 *step = NULL_TREE;
865 if (TREE_CODE (stmt) != MODIFY_EXPR)
866 return false;
868 lhs = TREE_OPERAND (stmt, 0);
869 if (TREE_CODE (lhs) != SSA_NAME)
870 return false;
872 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
873 return false;
875 /* FIXME: We do not handle induction variables whose step does
876 not satisfy cst_and_fits_in_hwi. */
877 if (!zero_p (*step)
878 && !cst_and_fits_in_hwi (*step))
879 return false;
881 if (contains_abnormal_ssa_name_p (*base))
882 return false;
884 return true;
887 /* Finds general ivs in statement STMT. */
889 static void
890 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
892 tree base, step;
894 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
895 return;
897 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
900 /* Finds general ivs in basic block BB. */
902 static void
903 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
905 block_stmt_iterator bsi;
907 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
908 find_givs_in_stmt (data, bsi_stmt (bsi));
911 /* Finds general ivs. */
913 static void
914 find_givs (struct ivopts_data *data)
916 struct loop *loop = data->current_loop;
917 basic_block *body = get_loop_body_in_dom_order (loop);
918 unsigned i;
920 for (i = 0; i < loop->num_nodes; i++)
921 find_givs_in_bb (data, body[i]);
922 free (body);
925 /* Determine the number of iterations of the current loop. */
927 static void
928 determine_number_of_iterations (struct ivopts_data *data)
930 struct loop *loop = data->current_loop;
931 edge exit = single_dom_exit (loop);
933 if (!exit)
934 return;
936 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
939 /* For each ssa name defined in LOOP determines whether it is an induction
940 variable and if so, its initial value and step. */
942 static bool
943 find_induction_variables (struct ivopts_data *data)
945 unsigned i;
946 struct loop *loop = data->current_loop;
947 bitmap_iterator bi;
949 if (!find_bivs (data))
950 return false;
952 find_givs (data);
953 mark_bivs (data);
954 determine_number_of_iterations (data);
956 if (dump_file && (dump_flags & TDF_DETAILS))
958 if (loop_data (loop)->niter.niter)
960 fprintf (dump_file, " number of iterations ");
961 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
962 TDF_SLIM);
963 fprintf (dump_file, "\n");
965 fprintf (dump_file, " may be zero if ");
966 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
967 TDF_SLIM);
968 fprintf (dump_file, "\n");
970 fprintf (dump_file, " bogus unless ");
971 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
972 TDF_SLIM);
973 fprintf (dump_file, "\n");
974 fprintf (dump_file, "\n");
977 fprintf (dump_file, "Induction variables:\n\n");
979 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
981 if (ver_info (data, i)->iv)
982 dump_iv (dump_file, ver_info (data, i)->iv);
986 return true;
989 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
991 static struct iv_use *
992 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
993 tree stmt, enum use_type use_type)
995 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
997 use->id = n_iv_uses (data);
998 use->type = use_type;
999 use->iv = iv;
1000 use->stmt = stmt;
1001 use->op_p = use_p;
1002 use->related_cands = BITMAP_XMALLOC ();
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1005 dump_use (dump_file, use);
1007 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1009 return use;
1012 /* Checks whether OP is a loop-level invariant and if so, records it.
1013 NONLINEAR_USE is true if the invariant is used in a way we do not
1014 handle specially. */
1016 static void
1017 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1019 basic_block bb;
1020 struct version_info *info;
1022 if (TREE_CODE (op) != SSA_NAME
1023 || !is_gimple_reg (op))
1024 return;
1026 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1027 if (bb
1028 && flow_bb_inside_loop_p (data->current_loop, bb))
1029 return;
1031 info = name_info (data, op);
1032 info->name = op;
1033 info->has_nonlin_use |= nonlinear_use;
1034 if (!info->inv_id)
1035 info->inv_id = ++data->max_inv_id;
1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1039 /* Checks whether the use OP is interesting and if so, records it
1040 as TYPE. */
1042 static struct iv_use *
1043 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1044 enum use_type type)
1046 struct iv *iv;
1047 struct iv *civ;
1048 tree stmt;
1049 struct iv_use *use;
1051 if (TREE_CODE (op) != SSA_NAME)
1052 return NULL;
1054 iv = get_iv (data, op);
1055 if (!iv)
1056 return NULL;
1058 if (iv->have_use_for)
1060 use = iv_use (data, iv->use_id);
1062 gcc_assert (use->type == USE_NONLINEAR_EXPR
1063 || use->type == USE_OUTER);
1065 if (type == USE_NONLINEAR_EXPR)
1066 use->type = USE_NONLINEAR_EXPR;
1067 return use;
1070 if (zero_p (iv->step))
1072 record_invariant (data, op, true);
1073 return NULL;
1075 iv->have_use_for = true;
1077 civ = xmalloc (sizeof (struct iv));
1078 *civ = *iv;
1080 stmt = SSA_NAME_DEF_STMT (op);
1081 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1082 || TREE_CODE (stmt) == MODIFY_EXPR);
1084 use = record_use (data, NULL, civ, stmt, type);
1085 iv->use_id = use->id;
1087 return use;
1090 /* Checks whether the use OP is interesting and if so, records it. */
1092 static struct iv_use *
1093 find_interesting_uses_op (struct ivopts_data *data, tree op)
1095 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1098 /* Records a definition of induction variable OP that is used outside of the
1099 loop. */
1101 static struct iv_use *
1102 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1104 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1107 /* Checks whether the condition *COND_P in STMT is interesting
1108 and if so, records it. */
1110 static void
1111 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1113 tree *op0_p;
1114 tree *op1_p;
1115 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1116 struct iv const_iv;
1117 tree zero = integer_zero_node;
1119 const_iv.step = NULL_TREE;
1121 if (integer_zerop (*cond_p)
1122 || integer_nonzerop (*cond_p))
1123 return;
1125 if (TREE_CODE (*cond_p) == SSA_NAME)
1127 op0_p = cond_p;
1128 op1_p = &zero;
1130 else
1132 op0_p = &TREE_OPERAND (*cond_p, 0);
1133 op1_p = &TREE_OPERAND (*cond_p, 1);
1136 if (TREE_CODE (*op0_p) == SSA_NAME)
1137 iv0 = get_iv (data, *op0_p);
1138 else
1139 iv0 = &const_iv;
1141 if (TREE_CODE (*op1_p) == SSA_NAME)
1142 iv1 = get_iv (data, *op1_p);
1143 else
1144 iv1 = &const_iv;
1146 if (/* When comparing with non-invariant value, we may not do any senseful
1147 induction variable elimination. */
1148 (!iv0 || !iv1)
1149 /* Eliminating condition based on two ivs would be nontrivial.
1150 ??? TODO -- it is not really important to handle this case. */
1151 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1153 find_interesting_uses_op (data, *op0_p);
1154 find_interesting_uses_op (data, *op1_p);
1155 return;
1158 if (zero_p (iv0->step) && zero_p (iv1->step))
1160 /* If both are invariants, this is a work for unswitching. */
1161 return;
1164 civ = xmalloc (sizeof (struct iv));
1165 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1166 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1169 /* Returns true if expression EXPR is obviously invariant in LOOP,
1170 i.e. if all its operands are defined outside of the LOOP. */
1172 static bool
1173 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1175 basic_block def_bb;
1176 unsigned i, len;
1178 if (is_gimple_min_invariant (expr))
1179 return true;
1181 if (TREE_CODE (expr) == SSA_NAME)
1183 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1184 if (def_bb
1185 && flow_bb_inside_loop_p (loop, def_bb))
1186 return false;
1188 return true;
1191 if (!EXPR_P (expr))
1192 return false;
1194 len = first_rtl_op (TREE_CODE (expr));
1195 for (i = 0; i < len; i++)
1196 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1197 return false;
1199 return true;
1202 /* Cumulates the steps of indices into DATA and replaces their values with the
1203 initial ones. Returns false when the value of the index cannot be determined.
1204 Callback for for_each_index. */
1206 struct ifs_ivopts_data
1208 struct ivopts_data *ivopts_data;
1209 tree stmt;
1210 tree *step_p;
1213 static bool
1214 idx_find_step (tree base, tree *idx, void *data)
1216 struct ifs_ivopts_data *dta = data;
1217 struct iv *iv;
1218 tree step, type, iv_type, iv_step, lbound, off;
1219 struct loop *loop = dta->ivopts_data->current_loop;
1221 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1222 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1223 return false;
1225 /* If base is a component ref, require that the offset of the reference
1226 is invariant. */
1227 if (TREE_CODE (base) == COMPONENT_REF)
1229 off = component_ref_field_offset (base);
1230 return expr_invariant_in_loop_p (loop, off);
1233 /* If base is array, first check whether we will be able to move the
1234 reference out of the loop (in order to take its address in strength
1235 reduction). In order for this to work we need both lower bound
1236 and step to be loop invariants. */
1237 if (TREE_CODE (base) == ARRAY_REF)
1239 step = array_ref_element_size (base);
1240 lbound = array_ref_low_bound (base);
1242 if (!expr_invariant_in_loop_p (loop, step)
1243 || !expr_invariant_in_loop_p (loop, lbound))
1244 return false;
1247 if (TREE_CODE (*idx) != SSA_NAME)
1248 return true;
1250 iv = get_iv (dta->ivopts_data, *idx);
1251 if (!iv)
1252 return false;
1254 *idx = iv->base;
1256 if (!iv->step)
1257 return true;
1259 iv_type = TREE_TYPE (iv->base);
1260 type = build_pointer_type (TREE_TYPE (base));
1261 if (TREE_CODE (base) == ARRAY_REF)
1263 step = array_ref_element_size (base);
1265 /* We only handle addresses whose step is an integer constant. */
1266 if (TREE_CODE (step) != INTEGER_CST)
1267 return false;
1269 else
1270 /* The step for pointer arithmetics already is 1 byte. */
1271 step = build_int_cst (type, 1);
1273 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1274 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1275 type, iv->base, iv->step, dta->stmt);
1276 else
1277 iv_step = fold_convert (iv_type, iv->step);
1279 if (!iv_step)
1281 /* The index might wrap. */
1282 return false;
1285 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1287 if (!*dta->step_p)
1288 *dta->step_p = step;
1289 else
1290 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1292 return true;
1295 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1296 object is passed to it in DATA. */
1298 static bool
1299 idx_record_use (tree base, tree *idx,
1300 void *data)
1302 find_interesting_uses_op (data, *idx);
1303 if (TREE_CODE (base) == ARRAY_REF)
1305 find_interesting_uses_op (data, array_ref_element_size (base));
1306 find_interesting_uses_op (data, array_ref_low_bound (base));
1308 return true;
1311 /* Finds addresses in *OP_P inside STMT. */
1313 static void
1314 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1316 tree base = unshare_expr (*op_p), step = NULL;
1317 struct iv *civ;
1318 struct ifs_ivopts_data ifs_ivopts_data;
1320 /* Ignore bitfields for now. Not really something terribly complicated
1321 to handle. TODO. */
1322 if (TREE_CODE (base) == COMPONENT_REF
1323 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1324 goto fail;
1326 ifs_ivopts_data.ivopts_data = data;
1327 ifs_ivopts_data.stmt = stmt;
1328 ifs_ivopts_data.step_p = &step;
1329 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1330 || zero_p (step))
1331 goto fail;
1333 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1334 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1336 if (TREE_CODE (base) == INDIRECT_REF)
1337 base = TREE_OPERAND (base, 0);
1338 else
1339 base = build_addr (base);
1341 civ = alloc_iv (base, step);
1342 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1343 return;
1345 fail:
1346 for_each_index (op_p, idx_record_use, data);
1349 /* Finds and records invariants used in STMT. */
1351 static void
1352 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1354 use_optype uses = NULL;
1355 unsigned i, n;
1356 tree op;
1358 if (TREE_CODE (stmt) == PHI_NODE)
1359 n = PHI_NUM_ARGS (stmt);
1360 else
1362 get_stmt_operands (stmt);
1363 uses = STMT_USE_OPS (stmt);
1364 n = NUM_USES (uses);
1367 for (i = 0; i < n; i++)
1369 if (TREE_CODE (stmt) == PHI_NODE)
1370 op = PHI_ARG_DEF (stmt, i);
1371 else
1372 op = USE_OP (uses, i);
1374 record_invariant (data, op, false);
1378 /* Finds interesting uses of induction variables in the statement STMT. */
1380 static void
1381 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1383 struct iv *iv;
1384 tree op, lhs, rhs;
1385 use_optype uses = NULL;
1386 unsigned i, n;
1388 find_invariants_stmt (data, stmt);
1390 if (TREE_CODE (stmt) == COND_EXPR)
1392 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1393 return;
1396 if (TREE_CODE (stmt) == MODIFY_EXPR)
1398 lhs = TREE_OPERAND (stmt, 0);
1399 rhs = TREE_OPERAND (stmt, 1);
1401 if (TREE_CODE (lhs) == SSA_NAME)
1403 /* If the statement defines an induction variable, the uses are not
1404 interesting by themselves. */
1406 iv = get_iv (data, lhs);
1408 if (iv && !zero_p (iv->step))
1409 return;
1412 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1414 case tcc_comparison:
1415 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1416 return;
1418 case tcc_reference:
1419 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1420 if (REFERENCE_CLASS_P (lhs))
1421 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1422 return;
1424 default: ;
1427 if (REFERENCE_CLASS_P (lhs)
1428 && is_gimple_val (rhs))
1430 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1431 find_interesting_uses_op (data, rhs);
1432 return;
1435 /* TODO -- we should also handle address uses of type
1437 memory = call (whatever);
1441 call (memory). */
1444 if (TREE_CODE (stmt) == PHI_NODE
1445 && bb_for_stmt (stmt) == data->current_loop->header)
1447 lhs = PHI_RESULT (stmt);
1448 iv = get_iv (data, lhs);
1450 if (iv && !zero_p (iv->step))
1451 return;
1454 if (TREE_CODE (stmt) == PHI_NODE)
1455 n = PHI_NUM_ARGS (stmt);
1456 else
1458 uses = STMT_USE_OPS (stmt);
1459 n = NUM_USES (uses);
1462 for (i = 0; i < n; i++)
1464 if (TREE_CODE (stmt) == PHI_NODE)
1465 op = PHI_ARG_DEF (stmt, i);
1466 else
1467 op = USE_OP (uses, i);
1469 if (TREE_CODE (op) != SSA_NAME)
1470 continue;
1472 iv = get_iv (data, op);
1473 if (!iv)
1474 continue;
1476 find_interesting_uses_op (data, op);
1480 /* Finds interesting uses of induction variables outside of loops
1481 on loop exit edge EXIT. */
1483 static void
1484 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1486 tree phi, def;
1488 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1490 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1491 find_interesting_uses_outer (data, def);
1495 /* Finds uses of the induction variables that are interesting. */
1497 static void
1498 find_interesting_uses (struct ivopts_data *data)
1500 basic_block bb;
1501 block_stmt_iterator bsi;
1502 tree phi;
1503 basic_block *body = get_loop_body (data->current_loop);
1504 unsigned i;
1505 struct version_info *info;
1506 edge e;
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, "Uses:\n\n");
1511 for (i = 0; i < data->current_loop->num_nodes; i++)
1513 edge_iterator ei;
1514 bb = body[i];
1516 FOR_EACH_EDGE (e, ei, bb->succs)
1517 if (e->dest != EXIT_BLOCK_PTR
1518 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1519 find_interesting_uses_outside (data, e);
1521 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1522 find_interesting_uses_stmt (data, phi);
1523 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1524 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1527 if (dump_file && (dump_flags & TDF_DETAILS))
1529 bitmap_iterator bi;
1531 fprintf (dump_file, "\n");
1533 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1535 info = ver_info (data, i);
1536 if (info->inv_id)
1538 fprintf (dump_file, " ");
1539 print_generic_expr (dump_file, info->name, TDF_SLIM);
1540 fprintf (dump_file, " is invariant (%d)%s\n",
1541 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1545 fprintf (dump_file, "\n");
1548 free (body);
1551 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1552 position to POS. If USE is not NULL, the candidate is set as related to
1553 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1554 replacement of the final value of the iv by a direct computation. */
1556 static struct iv_cand *
1557 add_candidate_1 (struct ivopts_data *data,
1558 tree base, tree step, bool important, enum iv_position pos,
1559 struct iv_use *use, tree incremented_at)
1561 unsigned i;
1562 struct iv_cand *cand = NULL;
1563 tree type;
1565 if (base)
1567 type = TREE_TYPE (base);
1568 if (!TYPE_UNSIGNED (type))
1570 type = unsigned_type_for (type);
1571 base = fold_convert (type, base);
1572 if (step)
1573 step = fold_convert (type, step);
1577 for (i = 0; i < n_iv_cands (data); i++)
1579 cand = iv_cand (data, i);
1581 if (cand->pos != pos)
1582 continue;
1584 if (cand->incremented_at != incremented_at)
1585 continue;
1587 if (!cand->iv)
1589 if (!base && !step)
1590 break;
1592 continue;
1595 if (!base && !step)
1596 continue;
1598 if (!operand_equal_p (base, cand->iv->base, 0))
1599 continue;
1601 if (zero_p (cand->iv->step))
1603 if (zero_p (step))
1604 break;
1606 else
1608 if (step && operand_equal_p (step, cand->iv->step, 0))
1609 break;
1613 if (i == n_iv_cands (data))
1615 cand = xcalloc (1, sizeof (struct iv_cand));
1616 cand->id = i;
1618 if (!base && !step)
1619 cand->iv = NULL;
1620 else
1621 cand->iv = alloc_iv (base, step);
1623 cand->pos = pos;
1624 if (pos != IP_ORIGINAL && cand->iv)
1626 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1627 cand->var_after = cand->var_before;
1629 cand->important = important;
1630 cand->incremented_at = incremented_at;
1631 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1633 if (dump_file && (dump_flags & TDF_DETAILS))
1634 dump_cand (dump_file, cand);
1637 if (important && !cand->important)
1639 cand->important = true;
1640 if (dump_file && (dump_flags & TDF_DETAILS))
1641 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1644 if (use)
1646 bitmap_set_bit (use->related_cands, i);
1647 if (dump_file && (dump_flags & TDF_DETAILS))
1648 fprintf (dump_file, "Candidate %d is related to use %d\n",
1649 cand->id, use->id);
1652 return cand;
1655 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1656 position to POS. If USE is not NULL, the candidate is set as related to
1657 it. The candidate computation is scheduled on all available positions. */
1659 static void
1660 add_candidate (struct ivopts_data *data,
1661 tree base, tree step, bool important, struct iv_use *use)
1663 if (ip_normal_pos (data->current_loop))
1664 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1665 if (ip_end_pos (data->current_loop))
1666 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1669 /* Adds standard iv candidates. */
1671 static void
1672 add_standard_iv_candidates (struct ivopts_data *data)
1674 /* Add 0 + 1 * iteration candidate. */
1675 add_candidate (data,
1676 build_int_cst (unsigned_intSI_type_node, 0),
1677 build_int_cst (unsigned_intSI_type_node, 1),
1678 true, NULL);
1680 /* The same for a long type if it is still fast enough. */
1681 if (BITS_PER_WORD > 32)
1682 add_candidate (data,
1683 build_int_cst (unsigned_intDI_type_node, 0),
1684 build_int_cst (unsigned_intDI_type_node, 1),
1685 true, NULL);
1689 /* Adds candidates bases on the old induction variable IV. */
1691 static void
1692 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1694 tree phi, def;
1695 struct iv_cand *cand;
1697 add_candidate (data, iv->base, iv->step, true, NULL);
1699 /* The same, but with initial value zero. */
1700 add_candidate (data,
1701 build_int_cst (TREE_TYPE (iv->base), 0),
1702 iv->step, true, NULL);
1704 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1705 if (TREE_CODE (phi) == PHI_NODE)
1707 /* Additionally record the possibility of leaving the original iv
1708 untouched. */
1709 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1710 cand = add_candidate_1 (data,
1711 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1712 SSA_NAME_DEF_STMT (def));
1713 cand->var_before = iv->ssa_name;
1714 cand->var_after = def;
1718 /* Adds candidates based on the old induction variables. */
1720 static void
1721 add_old_ivs_candidates (struct ivopts_data *data)
1723 unsigned i;
1724 struct iv *iv;
1725 bitmap_iterator bi;
1727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1729 iv = ver_info (data, i)->iv;
1730 if (iv && iv->biv_p && !zero_p (iv->step))
1731 add_old_iv_candidates (data, iv);
1735 /* Adds candidates based on the value of the induction variable IV and USE. */
1737 static void
1738 add_iv_value_candidates (struct ivopts_data *data,
1739 struct iv *iv, struct iv_use *use)
1741 add_candidate (data, iv->base, iv->step, false, use);
1743 /* The same, but with initial value zero. */
1744 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1745 iv->step, false, use);
1748 /* Adds candidates based on the address IV and USE. */
1750 static void
1751 add_address_candidates (struct ivopts_data *data,
1752 struct iv *iv, struct iv_use *use)
1754 tree base, abase, tmp, *act;
1756 /* First, the trivial choices. */
1757 add_iv_value_candidates (data, iv, use);
1759 /* Second, try removing the COMPONENT_REFs. */
1760 if (TREE_CODE (iv->base) == ADDR_EXPR)
1762 base = TREE_OPERAND (iv->base, 0);
1763 while (TREE_CODE (base) == COMPONENT_REF
1764 || (TREE_CODE (base) == ARRAY_REF
1765 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1766 base = TREE_OPERAND (base, 0);
1768 if (base != TREE_OPERAND (iv->base, 0))
1770 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1771 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1773 if (TREE_CODE (base) == INDIRECT_REF)
1774 base = TREE_OPERAND (base, 0);
1775 else
1776 base = build_addr (base);
1777 add_candidate (data, base, iv->step, false, use);
1781 /* Third, try removing the constant offset. */
1782 abase = iv->base;
1783 while (TREE_CODE (abase) == PLUS_EXPR
1784 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1785 abase = TREE_OPERAND (abase, 0);
1786 /* We found the offset, so make the copy of the non-shared part and
1787 remove it. */
1788 if (TREE_CODE (abase) == PLUS_EXPR)
1790 tmp = iv->base;
1791 act = &base;
1793 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1795 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1796 NULL_TREE, TREE_OPERAND (tmp, 1));
1797 act = &TREE_OPERAND (*act, 0);
1799 *act = TREE_OPERAND (tmp, 0);
1801 add_candidate (data, base, iv->step, false, use);
1805 /* Possibly adds pseudocandidate for replacing the final value of USE by
1806 a direct computation. */
1808 static void
1809 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1811 struct tree_niter_desc *niter;
1812 struct loop *loop = data->current_loop;
1814 /* We must know where we exit the loop and how many times does it roll. */
1815 if (!single_dom_exit (loop))
1816 return;
1818 niter = &loop_data (loop)->niter;
1819 if (!niter->niter
1820 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1821 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1822 return;
1824 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1827 /* Adds candidates based on the uses. */
1829 static void
1830 add_derived_ivs_candidates (struct ivopts_data *data)
1832 unsigned i;
1834 for (i = 0; i < n_iv_uses (data); i++)
1836 struct iv_use *use = iv_use (data, i);
1838 if (!use)
1839 continue;
1841 switch (use->type)
1843 case USE_NONLINEAR_EXPR:
1844 case USE_COMPARE:
1845 /* Just add the ivs based on the value of the iv used here. */
1846 add_iv_value_candidates (data, use->iv, use);
1847 break;
1849 case USE_OUTER:
1850 add_iv_value_candidates (data, use->iv, use);
1852 /* Additionally, add the pseudocandidate for the possibility to
1853 replace the final value by a direct computation. */
1854 add_iv_outer_candidates (data, use);
1855 break;
1857 case USE_ADDRESS:
1858 add_address_candidates (data, use->iv, use);
1859 break;
1861 default:
1862 gcc_unreachable ();
1867 /* Finds the candidates for the induction variables. */
1869 static void
1870 find_iv_candidates (struct ivopts_data *data)
1872 /* Add commonly used ivs. */
1873 add_standard_iv_candidates (data);
1875 /* Add old induction variables. */
1876 add_old_ivs_candidates (data);
1878 /* Add induction variables derived from uses. */
1879 add_derived_ivs_candidates (data);
1882 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1883 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1884 we allocate a simple list to every use. */
1886 static void
1887 alloc_use_cost_map (struct ivopts_data *data)
1889 unsigned i, n_imp = 0, size, j;
1891 if (!data->consider_all_candidates)
1893 for (i = 0; i < n_iv_cands (data); i++)
1895 struct iv_cand *cand = iv_cand (data, i);
1896 if (cand->important)
1897 n_imp++;
1901 for (i = 0; i < n_iv_uses (data); i++)
1903 struct iv_use *use = iv_use (data, i);
1904 bitmap_iterator bi;
1906 if (data->consider_all_candidates)
1908 size = n_iv_cands (data);
1909 use->n_map_members = size;
1911 else
1913 size = n_imp;
1914 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
1916 size++;
1918 use->n_map_members = 0;
1921 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1925 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1926 on invariants DEPENDS_ON. */
1928 static void
1929 set_use_iv_cost (struct ivopts_data *data,
1930 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1931 bitmap depends_on)
1933 if (cost == INFTY
1934 && depends_on)
1936 BITMAP_XFREE (depends_on);
1937 depends_on = NULL;
1940 if (data->consider_all_candidates)
1942 use->cost_map[cand->id].cand = cand;
1943 use->cost_map[cand->id].cost = cost;
1944 use->cost_map[cand->id].depends_on = depends_on;
1945 return;
1948 if (cost == INFTY)
1949 return;
1951 use->cost_map[use->n_map_members].cand = cand;
1952 use->cost_map[use->n_map_members].cost = cost;
1953 use->cost_map[use->n_map_members].depends_on = depends_on;
1954 use->n_map_members++;
1957 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1958 DEPENDS_ON. */
1960 static unsigned
1961 get_use_iv_cost (struct ivopts_data *data,
1962 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1964 unsigned i;
1966 if (!cand)
1967 return INFTY;
1969 if (data->consider_all_candidates)
1970 i = cand->id;
1971 else
1973 for (i = 0; i < use->n_map_members; i++)
1974 if (use->cost_map[i].cand == cand)
1975 break;
1977 if (i == use->n_map_members)
1978 return INFTY;
1981 if (depends_on)
1982 *depends_on = use->cost_map[i].depends_on;
1983 return use->cost_map[i].cost;
1986 /* Returns estimate on cost of computing SEQ. */
1988 static unsigned
1989 seq_cost (rtx seq)
1991 unsigned cost = 0;
1992 rtx set;
1994 for (; seq; seq = NEXT_INSN (seq))
1996 set = single_set (seq);
1997 if (set)
1998 cost += rtx_cost (set, SET);
1999 else
2000 cost++;
2003 return cost;
2006 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2007 static rtx
2008 produce_memory_decl_rtl (tree obj, int *regno)
2010 rtx x;
2011 if (!obj)
2012 abort ();
2013 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2015 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2016 x = gen_rtx_SYMBOL_REF (Pmode, name);
2018 else
2019 x = gen_raw_REG (Pmode, (*regno)++);
2021 return gen_rtx_MEM (DECL_MODE (obj), x);
2024 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2025 walk_tree. DATA contains the actual fake register number. */
2027 static tree
2028 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2030 tree obj = NULL_TREE;
2031 rtx x = NULL_RTX;
2032 int *regno = data;
2034 switch (TREE_CODE (*expr_p))
2036 case ADDR_EXPR:
2037 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2038 (handled_component_p (*expr_p)
2039 || TREE_CODE (*expr_p) == REALPART_EXPR
2040 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2041 expr_p = &TREE_OPERAND (*expr_p, 0));
2042 obj = *expr_p;
2043 if (DECL_P (obj))
2044 x = produce_memory_decl_rtl (obj, regno);
2045 break;
2047 case SSA_NAME:
2048 *ws = 0;
2049 obj = SSA_NAME_VAR (*expr_p);
2050 if (!DECL_RTL_SET_P (obj))
2051 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2052 break;
2054 case VAR_DECL:
2055 case PARM_DECL:
2056 case RESULT_DECL:
2057 *ws = 0;
2058 obj = *expr_p;
2060 if (DECL_RTL_SET_P (obj))
2061 break;
2063 if (DECL_MODE (obj) == BLKmode)
2064 x = produce_memory_decl_rtl (obj, regno);
2065 else
2066 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2068 break;
2070 default:
2071 break;
2074 if (x)
2076 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2077 SET_DECL_RTL (obj, x);
2080 return NULL_TREE;
2083 /* Determines cost of the computation of EXPR. */
2085 static unsigned
2086 computation_cost (tree expr)
2088 rtx seq, rslt;
2089 tree type = TREE_TYPE (expr);
2090 unsigned cost;
2091 int regno = 0;
2093 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2094 start_sequence ();
2095 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2096 seq = get_insns ();
2097 end_sequence ();
2099 cost = seq_cost (seq);
2100 if (GET_CODE (rslt) == MEM)
2101 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2103 return cost;
2106 /* Returns variable containing the value of candidate CAND at statement AT. */
2108 static tree
2109 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2111 if (stmt_after_increment (loop, cand, stmt))
2112 return cand->var_after;
2113 else
2114 return cand->var_before;
2117 /* Determines the expression by that USE is expressed from induction variable
2118 CAND at statement AT in LOOP. */
2120 static tree
2121 get_computation_at (struct loop *loop,
2122 struct iv_use *use, struct iv_cand *cand, tree at)
2124 tree ubase = use->iv->base;
2125 tree ustep = use->iv->step;
2126 tree cbase = cand->iv->base;
2127 tree cstep = cand->iv->step;
2128 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2129 tree uutype;
2130 tree expr, delta;
2131 tree ratio;
2132 unsigned HOST_WIDE_INT ustepi, cstepi;
2133 HOST_WIDE_INT ratioi;
2135 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2137 /* We do not have a precision to express the values of use. */
2138 return NULL_TREE;
2141 expr = var_at_stmt (loop, cand, at);
2143 if (TREE_TYPE (expr) != ctype)
2145 /* This may happen with the original ivs. */
2146 expr = fold_convert (ctype, expr);
2149 if (TYPE_UNSIGNED (utype))
2150 uutype = utype;
2151 else
2153 uutype = unsigned_type_for (utype);
2154 ubase = fold_convert (uutype, ubase);
2155 ustep = fold_convert (uutype, ustep);
2158 if (uutype != ctype)
2160 expr = fold_convert (uutype, expr);
2161 cbase = fold_convert (uutype, cbase);
2162 cstep = fold_convert (uutype, cstep);
2165 if (!cst_and_fits_in_hwi (cstep)
2166 || !cst_and_fits_in_hwi (ustep))
2167 return NULL_TREE;
2169 ustepi = int_cst_value (ustep);
2170 cstepi = int_cst_value (cstep);
2172 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2174 /* TODO maybe consider case when ustep divides cstep and the ratio is
2175 a power of 2 (so that the division is fast to execute)? We would
2176 need to be much more careful with overflows etc. then. */
2177 return NULL_TREE;
2180 /* We may need to shift the value if we are after the increment. */
2181 if (stmt_after_increment (loop, cand, at))
2182 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2184 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2185 or |ratio| == 1, it is better to handle this like
2187 ubase - ratio * cbase + ratio * var. */
2189 if (ratioi == 1)
2191 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2192 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2194 else if (ratioi == -1)
2196 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2197 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2199 else if (TREE_CODE (cbase) == INTEGER_CST)
2201 ratio = build_int_cst_type (uutype, ratioi);
2202 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2203 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2204 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2205 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2207 else
2209 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2210 ratio = build_int_cst_type (uutype, ratioi);
2211 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2212 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2215 return fold_convert (utype, expr);
2218 /* Determines the expression by that USE is expressed from induction variable
2219 CAND in LOOP. */
2221 static tree
2222 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2224 return get_computation_at (loop, use, cand, use->stmt);
2227 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2229 static void
2230 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2232 tree op0, op1;
2233 enum tree_code code;
2235 while (1)
2237 if (cst_and_fits_in_hwi (*expr))
2239 *offset += int_cst_value (*expr);
2240 *expr = integer_zero_node;
2241 return;
2244 code = TREE_CODE (*expr);
2246 if (code != PLUS_EXPR && code != MINUS_EXPR)
2247 return;
2249 op0 = TREE_OPERAND (*expr, 0);
2250 op1 = TREE_OPERAND (*expr, 1);
2252 if (cst_and_fits_in_hwi (op1))
2254 if (code == PLUS_EXPR)
2255 *offset += int_cst_value (op1);
2256 else
2257 *offset -= int_cst_value (op1);
2259 *expr = op0;
2260 continue;
2263 if (code != PLUS_EXPR)
2264 return;
2266 if (!cst_and_fits_in_hwi (op0))
2267 return;
2269 *offset += int_cst_value (op0);
2270 *expr = op1;
2274 /* Returns cost of addition in MODE. */
2276 static unsigned
2277 add_cost (enum machine_mode mode)
2279 static unsigned costs[NUM_MACHINE_MODES];
2280 rtx seq;
2281 unsigned cost;
2283 if (costs[mode])
2284 return costs[mode];
2286 start_sequence ();
2287 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2288 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2289 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2290 NULL_RTX);
2291 seq = get_insns ();
2292 end_sequence ();
2294 cost = seq_cost (seq);
2295 if (!cost)
2296 cost = 1;
2298 costs[mode] = cost;
2300 if (dump_file && (dump_flags & TDF_DETAILS))
2301 fprintf (dump_file, "Addition in %s costs %d\n",
2302 GET_MODE_NAME (mode), cost);
2303 return cost;
2306 /* Entry in a hashtable of already known costs for multiplication. */
2307 struct mbc_entry
2309 HOST_WIDE_INT cst; /* The constant to multiply by. */
2310 enum machine_mode mode; /* In mode. */
2311 unsigned cost; /* The cost. */
2314 /* Counts hash value for the ENTRY. */
2316 static hashval_t
2317 mbc_entry_hash (const void *entry)
2319 const struct mbc_entry *e = entry;
2321 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2324 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2326 static int
2327 mbc_entry_eq (const void *entry1, const void *entry2)
2329 const struct mbc_entry *e1 = entry1;
2330 const struct mbc_entry *e2 = entry2;
2332 return (e1->mode == e2->mode
2333 && e1->cst == e2->cst);
2336 /* Returns cost of multiplication by constant CST in MODE. */
2338 static unsigned
2339 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2341 static htab_t costs;
2342 struct mbc_entry **cached, act;
2343 rtx seq;
2344 unsigned cost;
2346 if (!costs)
2347 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2349 act.mode = mode;
2350 act.cst = cst;
2351 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2352 if (*cached)
2353 return (*cached)->cost;
2355 *cached = xmalloc (sizeof (struct mbc_entry));
2356 (*cached)->mode = mode;
2357 (*cached)->cst = cst;
2359 start_sequence ();
2360 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2361 NULL_RTX, 0);
2362 seq = get_insns ();
2363 end_sequence ();
2365 cost = seq_cost (seq);
2367 if (dump_file && (dump_flags & TDF_DETAILS))
2368 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2369 (int) cst, GET_MODE_NAME (mode), cost);
2371 (*cached)->cost = cost;
2373 return cost;
2376 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2377 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2378 variable is omitted. The created memory accesses MODE.
2380 TODO -- there must be some better way. This all is quite crude. */
2382 static unsigned
2383 get_address_cost (bool symbol_present, bool var_present,
2384 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2386 #define MAX_RATIO 128
2387 static sbitmap valid_mult;
2388 static HOST_WIDE_INT rat, off;
2389 static HOST_WIDE_INT min_offset, max_offset;
2390 static unsigned costs[2][2][2][2];
2391 unsigned cost, acost;
2392 rtx seq, addr, base;
2393 bool offset_p, ratio_p;
2394 rtx reg1;
2395 HOST_WIDE_INT s_offset;
2396 unsigned HOST_WIDE_INT mask;
2397 unsigned bits;
2399 if (!valid_mult)
2401 HOST_WIDE_INT i;
2403 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2405 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2406 for (i = 1; i <= 1 << 20; i <<= 1)
2408 XEXP (addr, 1) = GEN_INT (i);
2409 if (!memory_address_p (Pmode, addr))
2410 break;
2412 max_offset = i >> 1;
2413 off = max_offset;
2415 for (i = 1; i <= 1 << 20; i <<= 1)
2417 XEXP (addr, 1) = GEN_INT (-i);
2418 if (!memory_address_p (Pmode, addr))
2419 break;
2421 min_offset = -(i >> 1);
2423 if (dump_file && (dump_flags & TDF_DETAILS))
2425 fprintf (dump_file, "get_address_cost:\n");
2426 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2427 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2430 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2431 sbitmap_zero (valid_mult);
2432 rat = 1;
2433 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2434 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2436 XEXP (addr, 1) = GEN_INT (i);
2437 if (memory_address_p (Pmode, addr))
2439 SET_BIT (valid_mult, i + MAX_RATIO);
2440 rat = i;
2444 if (dump_file && (dump_flags & TDF_DETAILS))
2446 fprintf (dump_file, " allowed multipliers:");
2447 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2448 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2449 fprintf (dump_file, " %d", (int) i);
2450 fprintf (dump_file, "\n");
2451 fprintf (dump_file, "\n");
2455 bits = GET_MODE_BITSIZE (Pmode);
2456 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2457 offset &= mask;
2458 if ((offset >> (bits - 1) & 1))
2459 offset |= ~mask;
2460 s_offset = offset;
2462 cost = 0;
2463 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2464 ratio_p = (ratio != 1
2465 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2466 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2468 if (ratio != 1 && !ratio_p)
2469 cost += multiply_by_cost (ratio, Pmode);
2471 if (s_offset && !offset_p && !symbol_present)
2473 cost += add_cost (Pmode);
2474 var_present = true;
2477 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2478 if (!acost)
2480 acost = 0;
2482 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2483 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2484 if (ratio_p)
2485 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2487 if (symbol_present)
2489 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2490 if (offset_p)
2491 base = gen_rtx_fmt_e (CONST, Pmode,
2492 gen_rtx_fmt_ee (PLUS, Pmode,
2493 base,
2494 GEN_INT (off)));
2495 if (var_present)
2496 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2499 else if (var_present)
2501 base = reg1;
2502 if (offset_p)
2503 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2505 else if (offset_p)
2506 base = GEN_INT (off);
2507 else
2508 base = NULL_RTX;
2510 if (base)
2511 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2513 start_sequence ();
2514 addr = memory_address (Pmode, addr);
2515 seq = get_insns ();
2516 end_sequence ();
2518 acost = seq_cost (seq);
2519 acost += address_cost (addr, Pmode);
2521 if (!acost)
2522 acost = 1;
2523 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2526 return cost + acost;
2529 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2530 the bitmap to that we should store it. */
2532 static struct ivopts_data *fd_ivopts_data;
2533 static tree
2534 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2536 bitmap *depends_on = data;
2537 struct version_info *info;
2539 if (TREE_CODE (*expr_p) != SSA_NAME)
2540 return NULL_TREE;
2541 info = name_info (fd_ivopts_data, *expr_p);
2543 if (!info->inv_id || info->has_nonlin_use)
2544 return NULL_TREE;
2546 if (!*depends_on)
2547 *depends_on = BITMAP_XMALLOC ();
2548 bitmap_set_bit (*depends_on, info->inv_id);
2550 return NULL_TREE;
2553 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2554 invariants the computation depends on. */
2556 static unsigned
2557 force_var_cost (struct ivopts_data *data,
2558 tree expr, bitmap *depends_on)
2560 static bool costs_initialized = false;
2561 static unsigned integer_cost;
2562 static unsigned symbol_cost;
2563 static unsigned address_cost;
2565 if (!costs_initialized)
2567 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2568 rtx x = gen_rtx_MEM (DECL_MODE (var),
2569 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2570 tree addr;
2571 tree type = build_pointer_type (integer_type_node);
2573 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2574 2000));
2576 SET_DECL_RTL (var, x);
2577 TREE_STATIC (var) = 1;
2578 addr = build1 (ADDR_EXPR, type, var);
2579 symbol_cost = computation_cost (addr) + 1;
2581 address_cost
2582 = computation_cost (build2 (PLUS_EXPR, type,
2583 addr,
2584 build_int_cst_type (type, 2000))) + 1;
2585 if (dump_file && (dump_flags & TDF_DETAILS))
2587 fprintf (dump_file, "force_var_cost:\n");
2588 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2589 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2590 fprintf (dump_file, " address %d\n", (int) address_cost);
2591 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2592 fprintf (dump_file, "\n");
2595 costs_initialized = true;
2598 if (depends_on)
2600 fd_ivopts_data = data;
2601 walk_tree (&expr, find_depends, depends_on, NULL);
2604 if (SSA_VAR_P (expr))
2605 return 0;
2607 if (TREE_INVARIANT (expr))
2609 if (TREE_CODE (expr) == INTEGER_CST)
2610 return integer_cost;
2612 if (TREE_CODE (expr) == ADDR_EXPR)
2614 tree obj = TREE_OPERAND (expr, 0);
2616 if (TREE_CODE (obj) == VAR_DECL
2617 || TREE_CODE (obj) == PARM_DECL
2618 || TREE_CODE (obj) == RESULT_DECL)
2619 return symbol_cost;
2622 return address_cost;
2625 /* Just an arbitrary value, FIXME. */
2626 return target_spill_cost;
2629 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2630 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2631 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2632 invariants the computation depends on. */
2634 static unsigned
2635 split_address_cost (struct ivopts_data *data,
2636 tree addr, bool *symbol_present, bool *var_present,
2637 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2639 tree core;
2640 HOST_WIDE_INT bitsize;
2641 HOST_WIDE_INT bitpos;
2642 tree toffset;
2643 enum machine_mode mode;
2644 int unsignedp, volatilep;
2646 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2647 &unsignedp, &volatilep);
2649 if (toffset != 0
2650 || bitpos % BITS_PER_UNIT != 0
2651 || TREE_CODE (core) != VAR_DECL)
2653 *symbol_present = false;
2654 *var_present = true;
2655 fd_ivopts_data = data;
2656 walk_tree (&addr, find_depends, depends_on, NULL);
2657 return target_spill_cost;
2660 *offset += bitpos / BITS_PER_UNIT;
2661 if (TREE_STATIC (core)
2662 || DECL_EXTERNAL (core))
2664 *symbol_present = true;
2665 *var_present = false;
2666 return 0;
2669 *symbol_present = false;
2670 *var_present = true;
2671 return 0;
2674 /* Estimates cost of expressing difference of addresses E1 - E2 as
2675 var + symbol + offset. The value of offset is added to OFFSET,
2676 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2677 part is missing. DEPENDS_ON is a set of the invariants the computation
2678 depends on. */
2680 static unsigned
2681 ptr_difference_cost (struct ivopts_data *data,
2682 tree e1, tree e2, bool *symbol_present, bool *var_present,
2683 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2685 HOST_WIDE_INT diff = 0;
2686 unsigned cost;
2688 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2690 if (TREE_CODE (e2) == ADDR_EXPR
2691 && ptr_difference_const (TREE_OPERAND (e1, 0),
2692 TREE_OPERAND (e2, 0), &diff))
2694 *offset += diff;
2695 *symbol_present = false;
2696 *var_present = false;
2697 return 0;
2700 if (e2 == integer_zero_node)
2701 return split_address_cost (data, TREE_OPERAND (e1, 0),
2702 symbol_present, var_present, offset, depends_on);
2704 *symbol_present = false;
2705 *var_present = true;
2707 cost = force_var_cost (data, e1, depends_on);
2708 cost += force_var_cost (data, e2, depends_on);
2709 cost += add_cost (Pmode);
2711 return cost;
2714 /* Estimates cost of expressing difference E1 - E2 as
2715 var + symbol + offset. The value of offset is added to OFFSET,
2716 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2717 part is missing. DEPENDS_ON is a set of the invariants the computation
2718 depends on. */
2720 static unsigned
2721 difference_cost (struct ivopts_data *data,
2722 tree e1, tree e2, bool *symbol_present, bool *var_present,
2723 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2725 unsigned cost;
2726 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2728 strip_offset (&e1, offset);
2729 *offset = -*offset;
2730 strip_offset (&e2, offset);
2731 *offset = -*offset;
2733 if (TREE_CODE (e1) == ADDR_EXPR)
2734 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2735 depends_on);
2736 *symbol_present = false;
2738 if (operand_equal_p (e1, e2, 0))
2740 *var_present = false;
2741 return 0;
2743 *var_present = true;
2744 if (zero_p (e2))
2745 return force_var_cost (data, e1, depends_on);
2747 if (zero_p (e1))
2749 cost = force_var_cost (data, e2, depends_on);
2750 cost += multiply_by_cost (-1, mode);
2752 return cost;
2755 cost = force_var_cost (data, e1, depends_on);
2756 cost += force_var_cost (data, e2, depends_on);
2757 cost += add_cost (mode);
2759 return cost;
2762 /* Determines the cost of the computation by that USE is expressed
2763 from induction variable CAND. If ADDRESS_P is true, we just need
2764 to create an address from it, otherwise we want to get it into
2765 register. A set of invariants we depend on is stored in
2766 DEPENDS_ON. AT is the statement at that the value is computed. */
2768 static unsigned
2769 get_computation_cost_at (struct ivopts_data *data,
2770 struct iv_use *use, struct iv_cand *cand,
2771 bool address_p, bitmap *depends_on, tree at)
2773 tree ubase = use->iv->base, ustep = use->iv->step;
2774 tree cbase, cstep;
2775 tree utype = TREE_TYPE (ubase), ctype;
2776 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2777 HOST_WIDE_INT ratio, aratio;
2778 bool var_present, symbol_present;
2779 unsigned cost = 0, n_sums;
2781 *depends_on = NULL;
2783 /* Only consider real candidates. */
2784 if (!cand->iv)
2785 return INFTY;
2787 cbase = cand->iv->base;
2788 cstep = cand->iv->step;
2789 ctype = TREE_TYPE (cbase);
2791 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2793 /* We do not have a precision to express the values of use. */
2794 return INFTY;
2797 if (!cst_and_fits_in_hwi (ustep)
2798 || !cst_and_fits_in_hwi (cstep))
2799 return INFTY;
2801 if (TREE_CODE (ubase) == INTEGER_CST
2802 && !cst_and_fits_in_hwi (ubase))
2803 goto fallback;
2805 if (TREE_CODE (cbase) == INTEGER_CST
2806 && !cst_and_fits_in_hwi (cbase))
2807 goto fallback;
2809 ustepi = int_cst_value (ustep);
2810 cstepi = int_cst_value (cstep);
2812 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2814 /* TODO -- add direct handling of this case. */
2815 goto fallback;
2818 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2819 return INFTY;
2821 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2822 or ratio == 1, it is better to handle this like
2824 ubase - ratio * cbase + ratio * var
2826 (also holds in the case ratio == -1, TODO. */
2828 if (TREE_CODE (cbase) == INTEGER_CST)
2830 offset = - ratio * int_cst_value (cbase);
2831 cost += difference_cost (data,
2832 ubase, integer_zero_node,
2833 &symbol_present, &var_present, &offset,
2834 depends_on);
2836 else if (ratio == 1)
2838 cost += difference_cost (data,
2839 ubase, cbase,
2840 &symbol_present, &var_present, &offset,
2841 depends_on);
2843 else
2845 cost += force_var_cost (data, cbase, depends_on);
2846 cost += add_cost (TYPE_MODE (ctype));
2847 cost += difference_cost (data,
2848 ubase, integer_zero_node,
2849 &symbol_present, &var_present, &offset,
2850 depends_on);
2853 /* If we are after the increment, the value of the candidate is higher by
2854 one iteration. */
2855 if (stmt_after_increment (data->current_loop, cand, at))
2856 offset -= ratio * cstepi;
2858 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2859 (symbol/var/const parts may be omitted). If we are looking for an address,
2860 find the cost of addressing this. */
2861 if (address_p)
2862 return get_address_cost (symbol_present, var_present, offset, ratio);
2864 /* Otherwise estimate the costs for computing the expression. */
2865 aratio = ratio > 0 ? ratio : -ratio;
2866 if (!symbol_present && !var_present && !offset)
2868 if (ratio != 1)
2869 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2871 return cost;
2874 if (aratio != 1)
2875 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2877 n_sums = 1;
2878 if (var_present
2879 /* Symbol + offset should be compile-time computable. */
2880 && (symbol_present || offset))
2881 n_sums++;
2883 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2885 fallback:
2887 /* Just get the expression, expand it and measure the cost. */
2888 tree comp = get_computation_at (data->current_loop, use, cand, at);
2890 if (!comp)
2891 return INFTY;
2893 if (address_p)
2894 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2896 return computation_cost (comp);
2900 /* Determines the cost of the computation by that USE is expressed
2901 from induction variable CAND. If ADDRESS_P is true, we just need
2902 to create an address from it, otherwise we want to get it into
2903 register. A set of invariants we depend on is stored in
2904 DEPENDS_ON. */
2906 static unsigned
2907 get_computation_cost (struct ivopts_data *data,
2908 struct iv_use *use, struct iv_cand *cand,
2909 bool address_p, bitmap *depends_on)
2911 return get_computation_cost_at (data,
2912 use, cand, address_p, depends_on, use->stmt);
2915 /* Determines cost of basing replacement of USE on CAND in a generic
2916 expression. */
2918 static void
2919 determine_use_iv_cost_generic (struct ivopts_data *data,
2920 struct iv_use *use, struct iv_cand *cand)
2922 bitmap depends_on;
2923 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2925 set_use_iv_cost (data, use, cand, cost, depends_on);
2928 /* Determines cost of basing replacement of USE on CAND in an address. */
2930 static void
2931 determine_use_iv_cost_address (struct ivopts_data *data,
2932 struct iv_use *use, struct iv_cand *cand)
2934 bitmap depends_on;
2935 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2937 set_use_iv_cost (data, use, cand, cost, depends_on);
2940 /* Computes value of induction variable IV in iteration NITER. */
2942 static tree
2943 iv_value (struct iv *iv, tree niter)
2945 tree val;
2946 tree type = TREE_TYPE (iv->base);
2948 niter = fold_convert (type, niter);
2949 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2951 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2954 /* Computes value of candidate CAND at position AT in iteration NITER. */
2956 static tree
2957 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2959 tree val = iv_value (cand->iv, niter);
2960 tree type = TREE_TYPE (cand->iv->base);
2962 if (stmt_after_increment (loop, cand, at))
2963 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2965 return val;
2968 /* Check whether it is possible to express the condition in USE by comparison
2969 of candidate CAND. If so, store the comparison code to COMPARE and the
2970 value compared with to BOUND. */
2972 static bool
2973 may_eliminate_iv (struct loop *loop,
2974 struct iv_use *use, struct iv_cand *cand,
2975 enum tree_code *compare, tree *bound)
2977 edge exit;
2978 struct tree_niter_desc *niter, new_niter;
2979 tree wider_type, type, base;
2981 /* For now just very primitive -- we work just for the single exit condition,
2982 and are quite conservative about the possible overflows. TODO -- both of
2983 these can be improved. */
2984 exit = single_dom_exit (loop);
2985 if (!exit)
2986 return false;
2987 if (use->stmt != last_stmt (exit->src))
2988 return false;
2990 niter = &loop_data (loop)->niter;
2991 if (!niter->niter
2992 || !integer_nonzerop (niter->assumptions)
2993 || !integer_zerop (niter->may_be_zero))
2994 return false;
2996 if (exit->flags & EDGE_TRUE_VALUE)
2997 *compare = EQ_EXPR;
2998 else
2999 *compare = NE_EXPR;
3001 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
3003 /* Let us check there is not some problem with overflows, by checking that
3004 the number of iterations is unchanged. */
3005 base = cand->iv->base;
3006 type = TREE_TYPE (base);
3007 if (stmt_after_increment (loop, cand, use->stmt))
3008 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3010 new_niter.niter = NULL_TREE;
3011 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3012 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3013 &new_niter);
3014 if (!new_niter.niter
3015 || !integer_nonzerop (new_niter.assumptions)
3016 || !integer_zerop (new_niter.may_be_zero))
3017 return false;
3019 wider_type = TREE_TYPE (new_niter.niter);
3020 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
3021 wider_type = TREE_TYPE (niter->niter);
3022 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
3023 fold_convert (wider_type, new_niter.niter), 0))
3024 return false;
3026 return true;
3029 /* Determines cost of basing replacement of USE on CAND in a condition. */
3031 static void
3032 determine_use_iv_cost_condition (struct ivopts_data *data,
3033 struct iv_use *use, struct iv_cand *cand)
3035 tree bound;
3036 enum tree_code compare;
3038 /* Only consider real candidates. */
3039 if (!cand->iv)
3041 set_use_iv_cost (data, use, cand, INFTY, NULL);
3042 return;
3045 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3047 bitmap depends_on = NULL;
3048 unsigned cost = force_var_cost (data, bound, &depends_on);
3050 set_use_iv_cost (data, use, cand, cost, depends_on);
3051 return;
3054 /* The induction variable elimination failed; just express the original
3055 giv. If it is compared with an invariant, note that we cannot get
3056 rid of it. */
3057 if (TREE_CODE (*use->op_p) == SSA_NAME)
3058 record_invariant (data, *use->op_p, true);
3059 else
3061 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3062 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3065 determine_use_iv_cost_generic (data, use, cand);
3068 /* Checks whether it is possible to replace the final value of USE by
3069 a direct computation. If so, the formula is stored to *VALUE. */
3071 static bool
3072 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3074 edge exit;
3075 struct tree_niter_desc *niter;
3077 exit = single_dom_exit (loop);
3078 if (!exit)
3079 return false;
3081 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3082 bb_for_stmt (use->stmt)));
3084 niter = &loop_data (loop)->niter;
3085 if (!niter->niter
3086 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3087 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3088 return false;
3090 *value = iv_value (use->iv, niter->niter);
3092 return true;
3095 /* Determines cost of replacing final value of USE using CAND. */
3097 static void
3098 determine_use_iv_cost_outer (struct ivopts_data *data,
3099 struct iv_use *use, struct iv_cand *cand)
3101 bitmap depends_on;
3102 unsigned cost;
3103 edge exit;
3104 tree value;
3105 struct loop *loop = data->current_loop;
3107 if (!cand->iv)
3109 if (!may_replace_final_value (loop, use, &value))
3111 set_use_iv_cost (data, use, cand, INFTY, NULL);
3112 return;
3115 depends_on = NULL;
3116 cost = force_var_cost (data, value, &depends_on);
3118 cost /= AVG_LOOP_NITER (loop);
3120 set_use_iv_cost (data, use, cand, cost, depends_on);
3121 return;
3124 exit = single_dom_exit (loop);
3125 if (exit)
3127 /* If there is just a single exit, we may use value of the candidate
3128 after we take it to determine the value of use. */
3129 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3130 last_stmt (exit->src));
3131 if (cost != INFTY)
3132 cost /= AVG_LOOP_NITER (loop);
3134 else
3136 /* Otherwise we just need to compute the iv. */
3137 cost = get_computation_cost (data, use, cand, false, &depends_on);
3140 set_use_iv_cost (data, use, cand, cost, depends_on);
3143 /* Determines cost of basing replacement of USE on CAND. */
3145 static void
3146 determine_use_iv_cost (struct ivopts_data *data,
3147 struct iv_use *use, struct iv_cand *cand)
3149 switch (use->type)
3151 case USE_NONLINEAR_EXPR:
3152 determine_use_iv_cost_generic (data, use, cand);
3153 break;
3155 case USE_OUTER:
3156 determine_use_iv_cost_outer (data, use, cand);
3157 break;
3159 case USE_ADDRESS:
3160 determine_use_iv_cost_address (data, use, cand);
3161 break;
3163 case USE_COMPARE:
3164 determine_use_iv_cost_condition (data, use, cand);
3165 break;
3167 default:
3168 gcc_unreachable ();
3172 /* Determines costs of basing the use of the iv on an iv candidate. */
3174 static void
3175 determine_use_iv_costs (struct ivopts_data *data)
3177 unsigned i, j;
3178 struct iv_use *use;
3179 struct iv_cand *cand;
3181 data->consider_all_candidates = (n_iv_cands (data)
3182 <= CONSIDER_ALL_CANDIDATES_BOUND);
3184 alloc_use_cost_map (data);
3186 if (!data->consider_all_candidates)
3188 /* Add the important candidate entries. */
3189 for (j = 0; j < n_iv_cands (data); j++)
3191 cand = iv_cand (data, j);
3192 if (!cand->important)
3193 continue;
3194 for (i = 0; i < n_iv_uses (data); i++)
3196 use = iv_use (data, i);
3197 determine_use_iv_cost (data, use, cand);
3202 for (i = 0; i < n_iv_uses (data); i++)
3204 use = iv_use (data, i);
3206 if (data->consider_all_candidates)
3208 for (j = 0; j < n_iv_cands (data); j++)
3210 cand = iv_cand (data, j);
3211 determine_use_iv_cost (data, use, cand);
3214 else
3216 bitmap_iterator bi;
3218 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3220 cand = iv_cand (data, j);
3221 if (!cand->important)
3222 determine_use_iv_cost (data, use, cand);
3227 if (dump_file && (dump_flags & TDF_DETAILS))
3229 fprintf (dump_file, "Use-candidate costs:\n");
3231 for (i = 0; i < n_iv_uses (data); i++)
3233 use = iv_use (data, i);
3235 fprintf (dump_file, "Use %d:\n", i);
3236 fprintf (dump_file, " cand\tcost\tdepends on\n");
3237 for (j = 0; j < use->n_map_members; j++)
3239 if (!use->cost_map[j].cand
3240 || use->cost_map[j].cost == INFTY)
3241 continue;
3243 fprintf (dump_file, " %d\t%d\t",
3244 use->cost_map[j].cand->id,
3245 use->cost_map[j].cost);
3246 if (use->cost_map[j].depends_on)
3247 bitmap_print (dump_file,
3248 use->cost_map[j].depends_on, "","");
3249 fprintf (dump_file, "\n");
3252 fprintf (dump_file, "\n");
3254 fprintf (dump_file, "\n");
3258 /* Determines cost of the candidate CAND. */
3260 static void
3261 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3263 unsigned cost_base, cost_step;
3264 tree base, last;
3265 basic_block bb;
3267 if (!cand->iv)
3269 cand->cost = 0;
3270 return;
3273 /* There are two costs associated with the candidate -- its increment
3274 and its initialization. The second is almost negligible for any loop
3275 that rolls enough, so we take it just very little into account. */
3277 base = cand->iv->base;
3278 cost_base = force_var_cost (data, base, NULL);
3279 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3281 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3283 /* Prefer the original iv unless we may gain something by replacing it. */
3284 if (cand->pos == IP_ORIGINAL)
3285 cand->cost--;
3287 /* Prefer not to insert statements into latch unless there are some
3288 already (so that we do not create unnecessary jumps). */
3289 if (cand->pos == IP_END)
3291 bb = ip_end_pos (data->current_loop);
3292 last = last_stmt (bb);
3294 if (!last
3295 || TREE_CODE (last) == LABEL_EXPR)
3296 cand->cost++;
3300 /* Determines costs of computation of the candidates. */
3302 static void
3303 determine_iv_costs (struct ivopts_data *data)
3305 unsigned i;
3307 if (dump_file && (dump_flags & TDF_DETAILS))
3309 fprintf (dump_file, "Candidate costs:\n");
3310 fprintf (dump_file, " cand\tcost\n");
3313 for (i = 0; i < n_iv_cands (data); i++)
3315 struct iv_cand *cand = iv_cand (data, i);
3317 determine_iv_cost (data, cand);
3319 if (dump_file && (dump_flags & TDF_DETAILS))
3320 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3323 if (dump_file && (dump_flags & TDF_DETAILS))
3324 fprintf (dump_file, "\n");
3327 /* Calculates cost for having SIZE induction variables. */
3329 static unsigned
3330 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3332 return global_cost_for_size (size,
3333 loop_data (data->current_loop)->regs_used,
3334 n_iv_uses (data));
3337 /* For each size of the induction variable set determine the penalty. */
3339 static void
3340 determine_set_costs (struct ivopts_data *data)
3342 unsigned j, n;
3343 tree phi, op;
3344 struct loop *loop = data->current_loop;
3345 bitmap_iterator bi;
3347 /* We use the following model (definitely improvable, especially the
3348 cost function -- TODO):
3350 We estimate the number of registers available (using MD data), name it A.
3352 We estimate the number of registers used by the loop, name it U. This
3353 number is obtained as the number of loop phi nodes (not counting virtual
3354 registers and bivs) + the number of variables from outside of the loop.
3356 We set a reserve R (free regs that are used for temporary computations,
3357 etc.). For now the reserve is a constant 3.
3359 Let I be the number of induction variables.
3361 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3362 make a lot of ivs without a reason).
3363 -- if A - R < U + I <= A, the cost is I * PRES_COST
3364 -- if U + I > A, the cost is I * PRES_COST and
3365 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3367 if (dump_file && (dump_flags & TDF_DETAILS))
3369 fprintf (dump_file, "Global costs:\n");
3370 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3371 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3372 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3373 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3376 n = 0;
3377 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3379 op = PHI_RESULT (phi);
3381 if (!is_gimple_reg (op))
3382 continue;
3384 if (get_iv (data, op))
3385 continue;
3387 n++;
3390 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3392 struct version_info *info = ver_info (data, j);
3394 if (info->inv_id && info->has_nonlin_use)
3395 n++;
3398 loop_data (loop)->regs_used = n;
3399 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, " regs_used %d\n", n);
3402 if (dump_file && (dump_flags & TDF_DETAILS))
3404 fprintf (dump_file, " cost for size:\n");
3405 fprintf (dump_file, " ivs\tcost\n");
3406 for (j = 0; j <= 2 * target_avail_regs; j++)
3407 fprintf (dump_file, " %d\t%d\n", j,
3408 ivopts_global_cost_for_size (data, j));
3409 fprintf (dump_file, "\n");
3413 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3414 taken from the set SOL and they may depend on invariants in the set INV.
3415 The really used candidate and invariants are noted to USED_IVS and
3416 USED_INV. */
3418 static unsigned
3419 find_best_candidate (struct ivopts_data *data,
3420 struct iv_use *use, bitmap sol, bitmap inv,
3421 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3423 unsigned c, d;
3424 unsigned best_cost = INFTY, cost;
3425 struct iv_cand *cnd = NULL, *acnd;
3426 bitmap depends_on = NULL, asol;
3427 bitmap_iterator bi, bi1;
3429 if (data->consider_all_candidates)
3430 asol = sol;
3431 else
3433 asol = BITMAP_XMALLOC ();
3434 bitmap_a_and_b (asol, sol, use->related_cands);
3437 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c, bi)
3439 acnd = iv_cand (data, c);
3440 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3442 if (cost == INFTY)
3443 continue;
3444 if (cost > best_cost)
3445 continue;
3446 if (cost == best_cost)
3448 /* Prefer the cheaper iv. */
3449 if (acnd->cost >= cnd->cost)
3450 continue;
3453 if (depends_on)
3455 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d, bi1)
3457 goto next_cand;
3459 if (used_inv)
3460 bitmap_a_or_b (used_inv, used_inv, depends_on);
3463 cnd = acnd;
3464 best_cost = cost;
3466 next_cand: ;
3469 if (cnd && used_ivs)
3470 bitmap_set_bit (used_ivs, cnd->id);
3472 if (cand)
3473 *cand = cnd;
3475 if (!data->consider_all_candidates)
3476 BITMAP_XFREE (asol);
3478 return best_cost;
3481 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3482 induction variable candidates and invariants from the sets. Only
3483 uses 0 .. MAX_USE - 1 are taken into account. */
3485 static unsigned
3486 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3487 unsigned max_use)
3489 unsigned i;
3490 unsigned cost = 0, size = 0, acost;
3491 struct iv_use *use;
3492 struct iv_cand *cand;
3493 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3494 bitmap_iterator bi;
3496 for (i = 0; i < max_use; i++)
3498 use = iv_use (data, i);
3499 acost = find_best_candidate (data, use, sol, inv,
3500 used_ivs, used_inv, NULL);
3501 if (acost == INFTY)
3503 BITMAP_XFREE (used_ivs);
3504 BITMAP_XFREE (used_inv);
3505 return INFTY;
3507 cost += acost;
3510 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i, bi)
3512 cand = iv_cand (data, i);
3514 /* Do not count the pseudocandidates. */
3515 if (cand->iv)
3516 size++;
3518 cost += cand->cost;
3520 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, bi)
3522 size++;
3524 cost += ivopts_global_cost_for_size (data, size);
3526 bitmap_copy (sol, used_ivs);
3527 bitmap_copy (inv, used_inv);
3529 BITMAP_XFREE (used_ivs);
3530 BITMAP_XFREE (used_inv);
3532 return cost;
3535 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3536 induction variable candidates and invariants from the sets. */
3538 static unsigned
3539 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3541 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3544 /* Tries to extend the sets IVS and INV in the best possible way in order
3545 to express the USE. */
3547 static bool
3548 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3549 struct iv_use *use)
3551 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3552 bitmap best_ivs = BITMAP_XMALLOC ();
3553 bitmap best_inv = BITMAP_XMALLOC ();
3554 bitmap act_ivs = BITMAP_XMALLOC ();
3555 bitmap act_inv = BITMAP_XMALLOC ();
3556 unsigned i;
3557 struct cost_pair *cp;
3559 bitmap_copy (best_ivs, ivs);
3560 bitmap_copy (best_inv, inv);
3562 for (i = 0; i < use->n_map_members; i++)
3564 cp = use->cost_map + i;
3565 if (cp->cost == INFTY)
3566 continue;
3568 bitmap_copy (act_ivs, ivs);
3569 bitmap_set_bit (act_ivs, cp->cand->id);
3570 if (cp->depends_on)
3571 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3572 else
3573 bitmap_copy (act_inv, inv);
3574 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3576 if (act_cost < best_cost)
3578 best_cost = act_cost;
3579 bitmap_copy (best_ivs, act_ivs);
3580 bitmap_copy (best_inv, act_inv);
3584 bitmap_copy (ivs, best_ivs);
3585 bitmap_copy (inv, best_inv);
3587 BITMAP_XFREE (best_ivs);
3588 BITMAP_XFREE (best_inv);
3589 BITMAP_XFREE (act_ivs);
3590 BITMAP_XFREE (act_inv);
3592 return (best_cost != INFTY);
3595 /* Finds an initial set of IVS and invariants INV. We do this by simply
3596 choosing the best candidate for each use. */
3598 static unsigned
3599 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3601 unsigned i;
3603 for (i = 0; i < n_iv_uses (data); i++)
3604 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3605 return INFTY;
3607 return set_cost (data, ivs, inv);
3610 /* Tries to improve set of induction variables IVS and invariants INV to get
3611 it better than COST. */
3613 static bool
3614 try_improve_iv_set (struct ivopts_data *data,
3615 bitmap ivs, bitmap inv, unsigned *cost)
3617 unsigned i, acost;
3618 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3619 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3621 /* Try altering the set of induction variables by one. */
3622 for (i = 0; i < n_iv_cands (data); i++)
3624 bitmap_copy (new_ivs, ivs);
3625 bitmap_copy (new_inv, inv);
3627 if (bitmap_bit_p (ivs, i))
3628 bitmap_clear_bit (new_ivs, i);
3629 else
3630 bitmap_set_bit (new_ivs, i);
3632 acost = set_cost (data, new_ivs, new_inv);
3633 if (acost >= *cost)
3634 continue;
3636 if (!best_new_ivs)
3638 best_new_ivs = BITMAP_XMALLOC ();
3639 best_new_inv = BITMAP_XMALLOC ();
3642 *cost = acost;
3643 bitmap_copy (best_new_ivs, new_ivs);
3644 bitmap_copy (best_new_inv, new_inv);
3647 /* Ditto for invariants. */
3648 for (i = 1; i <= data->max_inv_id; i++)
3650 if (ver_info (data, i)->has_nonlin_use)
3651 continue;
3653 bitmap_copy (new_ivs, ivs);
3654 bitmap_copy (new_inv, inv);
3656 if (bitmap_bit_p (inv, i))
3657 bitmap_clear_bit (new_inv, i);
3658 else
3659 bitmap_set_bit (new_inv, i);
3661 acost = set_cost (data, new_ivs, new_inv);
3662 if (acost >= *cost)
3663 continue;
3665 if (!best_new_ivs)
3667 best_new_ivs = BITMAP_XMALLOC ();
3668 best_new_inv = BITMAP_XMALLOC ();
3671 *cost = acost;
3672 bitmap_copy (best_new_ivs, new_ivs);
3673 bitmap_copy (best_new_inv, new_inv);
3676 BITMAP_XFREE (new_ivs);
3677 BITMAP_XFREE (new_inv);
3679 if (!best_new_ivs)
3680 return false;
3682 bitmap_copy (ivs, best_new_ivs);
3683 bitmap_copy (inv, best_new_inv);
3684 BITMAP_XFREE (best_new_ivs);
3685 BITMAP_XFREE (best_new_inv);
3686 return true;
3689 /* Attempts to find the optimal set of induction variables. We do simple
3690 greedy heuristic -- we try to replace at most one candidate in the selected
3691 solution and remove the unused ivs while this improves the cost. */
3693 static bitmap
3694 find_optimal_iv_set (struct ivopts_data *data)
3696 unsigned cost, i;
3697 bitmap set = BITMAP_XMALLOC ();
3698 bitmap inv = BITMAP_XMALLOC ();
3699 struct iv_use *use;
3701 /* Set the upper bound. */
3702 cost = get_initial_solution (data, set, inv);
3703 if (cost == INFTY)
3705 if (dump_file && (dump_flags & TDF_DETAILS))
3706 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3707 BITMAP_XFREE (inv);
3708 BITMAP_XFREE (set);
3709 return NULL;
3712 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3715 bitmap_print (dump_file, set, "", "");
3716 fprintf (dump_file, " invariants ");
3717 bitmap_print (dump_file, inv, "", "");
3718 fprintf (dump_file, "\n");
3721 while (try_improve_iv_set (data, set, inv, &cost))
3723 if (dump_file && (dump_flags & TDF_DETAILS))
3725 fprintf (dump_file, "Improved to (cost %d): ", cost);
3726 bitmap_print (dump_file, set, "", "");
3727 fprintf (dump_file, " invariants ");
3728 bitmap_print (dump_file, inv, "", "");
3729 fprintf (dump_file, "\n");
3733 if (dump_file && (dump_flags & TDF_DETAILS))
3734 fprintf (dump_file, "Final cost %d\n\n", cost);
3736 for (i = 0; i < n_iv_uses (data); i++)
3738 use = iv_use (data, i);
3739 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3742 BITMAP_XFREE (inv);
3744 return set;
3747 /* Creates a new induction variable corresponding to CAND. */
3749 static void
3750 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3752 block_stmt_iterator incr_pos;
3753 tree base;
3754 bool after = false;
3756 if (!cand->iv)
3757 return;
3759 switch (cand->pos)
3761 case IP_NORMAL:
3762 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3763 break;
3765 case IP_END:
3766 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3767 after = true;
3768 break;
3770 case IP_ORIGINAL:
3771 /* Mark that the iv is preserved. */
3772 name_info (data, cand->var_before)->preserve_biv = true;
3773 name_info (data, cand->var_after)->preserve_biv = true;
3775 /* Rewrite the increment so that it uses var_before directly. */
3776 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3778 return;
3781 gimple_add_tmp_var (cand->var_before);
3782 add_referenced_tmp_var (cand->var_before);
3784 base = unshare_expr (cand->iv->base);
3786 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3787 &incr_pos, after, &cand->var_before, &cand->var_after);
3790 /* Creates new induction variables described in SET. */
3792 static void
3793 create_new_ivs (struct ivopts_data *data, bitmap set)
3795 unsigned i;
3796 struct iv_cand *cand;
3797 bitmap_iterator bi;
3799 EXECUTE_IF_SET_IN_BITMAP (set, 0, i, bi)
3801 cand = iv_cand (data, i);
3802 create_new_iv (data, cand);
3806 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3807 is true, remove also the ssa name defined by the statement. */
3809 static void
3810 remove_statement (tree stmt, bool including_defined_name)
3812 if (TREE_CODE (stmt) == PHI_NODE)
3814 if (!including_defined_name)
3816 /* Prevent the ssa name defined by the statement from being removed. */
3817 SET_PHI_RESULT (stmt, NULL);
3819 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3821 else
3823 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3825 bsi_remove (&bsi);
3829 /* Rewrites USE (definition of iv used in a nonlinear expression)
3830 using candidate CAND. */
3832 static void
3833 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3834 struct iv_use *use, struct iv_cand *cand)
3836 tree comp = unshare_expr (get_computation (data->current_loop,
3837 use, cand));
3838 tree op, stmts, tgt, ass;
3839 block_stmt_iterator bsi, pbsi;
3841 switch (TREE_CODE (use->stmt))
3843 case PHI_NODE:
3844 tgt = PHI_RESULT (use->stmt);
3846 /* If we should keep the biv, do not replace it. */
3847 if (name_info (data, tgt)->preserve_biv)
3848 return;
3850 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3851 while (!bsi_end_p (pbsi)
3852 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3854 bsi = pbsi;
3855 bsi_next (&pbsi);
3857 break;
3859 case MODIFY_EXPR:
3860 tgt = TREE_OPERAND (use->stmt, 0);
3861 bsi = stmt_for_bsi (use->stmt);
3862 break;
3864 default:
3865 gcc_unreachable ();
3868 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3870 if (TREE_CODE (use->stmt) == PHI_NODE)
3872 if (stmts)
3873 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3874 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3875 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3876 remove_statement (use->stmt, false);
3877 SSA_NAME_DEF_STMT (tgt) = ass;
3879 else
3881 if (stmts)
3882 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3883 TREE_OPERAND (use->stmt, 1) = op;
3887 /* Replaces ssa name in index IDX by its basic variable. Callback for
3888 for_each_index. */
3890 static bool
3891 idx_remove_ssa_names (tree base, tree *idx,
3892 void *data ATTRIBUTE_UNUSED)
3894 tree *op;
3896 if (TREE_CODE (*idx) == SSA_NAME)
3897 *idx = SSA_NAME_VAR (*idx);
3899 if (TREE_CODE (base) == ARRAY_REF)
3901 op = &TREE_OPERAND (base, 2);
3902 if (*op
3903 && TREE_CODE (*op) == SSA_NAME)
3904 *op = SSA_NAME_VAR (*op);
3905 op = &TREE_OPERAND (base, 3);
3906 if (*op
3907 && TREE_CODE (*op) == SSA_NAME)
3908 *op = SSA_NAME_VAR (*op);
3911 return true;
3914 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3916 static tree
3917 unshare_and_remove_ssa_names (tree ref)
3919 ref = unshare_expr (ref);
3920 for_each_index (&ref, idx_remove_ssa_names, NULL);
3922 return ref;
3925 /* Rewrites base of memory access OP with expression WITH in statement
3926 pointed to by BSI. */
3928 static void
3929 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3931 tree bvar, var, new_var, new_name, copy, name;
3932 tree orig;
3934 var = bvar = get_base_address (*op);
3936 if (!var || TREE_CODE (with) != SSA_NAME)
3937 goto do_rewrite;
3939 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
3940 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
3941 if (TREE_CODE (var) == INDIRECT_REF)
3942 var = TREE_OPERAND (var, 0);
3943 if (TREE_CODE (var) == SSA_NAME)
3945 name = var;
3946 var = SSA_NAME_VAR (var);
3948 else if (DECL_P (var))
3949 name = NULL_TREE;
3950 else
3951 goto do_rewrite;
3953 if (var_ann (var)->type_mem_tag)
3954 var = var_ann (var)->type_mem_tag;
3956 /* We need to add a memory tag for the variable. But we do not want
3957 to add it to the temporary used for the computations, since this leads
3958 to problems in redundancy elimination when there are common parts
3959 in two computations referring to the different arrays. So we copy
3960 the variable to a new temporary. */
3961 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3962 if (name)
3963 new_name = duplicate_ssa_name (name, copy);
3964 else
3966 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3967 add_referenced_tmp_var (new_var);
3968 var_ann (new_var)->type_mem_tag = var;
3969 new_name = make_ssa_name (new_var, copy);
3971 TREE_OPERAND (copy, 0) = new_name;
3972 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3973 with = new_name;
3975 do_rewrite:
3977 orig = NULL_TREE;
3978 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
3979 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
3981 if (TREE_CODE (*op) == INDIRECT_REF)
3982 orig = REF_ORIGINAL (*op);
3983 if (!orig)
3984 orig = unshare_and_remove_ssa_names (*op);
3986 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3988 /* Record the original reference, for purposes of alias analysis. */
3989 REF_ORIGINAL (*op) = orig;
3992 /* Rewrites USE (address that is an iv) using candidate CAND. */
3994 static void
3995 rewrite_use_address (struct ivopts_data *data,
3996 struct iv_use *use, struct iv_cand *cand)
3998 tree comp = unshare_expr (get_computation (data->current_loop,
3999 use, cand));
4000 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4001 tree stmts;
4002 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4004 if (stmts)
4005 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4007 rewrite_address_base (&bsi, use->op_p, op);
4010 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4011 candidate CAND. */
4013 static void
4014 rewrite_use_compare (struct ivopts_data *data,
4015 struct iv_use *use, struct iv_cand *cand)
4017 tree comp;
4018 tree *op_p, cond, op, stmts, bound;
4019 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4020 enum tree_code compare;
4022 if (may_eliminate_iv (data->current_loop,
4023 use, cand, &compare, &bound))
4025 op = force_gimple_operand (unshare_expr (bound), &stmts,
4026 true, NULL_TREE);
4028 if (stmts)
4029 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4031 *use->op_p = build2 (compare, boolean_type_node,
4032 var_at_stmt (data->current_loop,
4033 cand, use->stmt), op);
4034 modify_stmt (use->stmt);
4035 return;
4038 /* The induction variable elimination failed; just express the original
4039 giv. */
4040 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4042 cond = *use->op_p;
4043 op_p = &TREE_OPERAND (cond, 0);
4044 if (TREE_CODE (*op_p) != SSA_NAME
4045 || zero_p (get_iv (data, *op_p)->step))
4046 op_p = &TREE_OPERAND (cond, 1);
4048 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4049 if (stmts)
4050 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4052 *op_p = op;
4055 /* Ensure that operand *OP_P may be used at the end of EXIT without
4056 violating loop closed ssa form. */
4058 static void
4059 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4061 basic_block def_bb;
4062 struct loop *def_loop;
4063 tree phi, use;
4065 use = USE_FROM_PTR (op_p);
4066 if (TREE_CODE (use) != SSA_NAME)
4067 return;
4069 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4070 if (!def_bb)
4071 return;
4073 def_loop = def_bb->loop_father;
4074 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4075 return;
4077 /* Try finding a phi node that copies the value out of the loop. */
4078 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4079 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4080 break;
4082 if (!phi)
4084 /* Create such a phi node. */
4085 tree new_name = duplicate_ssa_name (use, NULL);
4087 phi = create_phi_node (new_name, exit->dest);
4088 SSA_NAME_DEF_STMT (new_name) = phi;
4089 add_phi_arg (&phi, use, exit);
4092 SET_USE (op_p, PHI_RESULT (phi));
4095 /* Ensure that operands of STMT may be used at the end of EXIT without
4096 violating loop closed ssa form. */
4098 static void
4099 protect_loop_closed_ssa_form (edge exit, tree stmt)
4101 use_optype uses;
4102 vuse_optype vuses;
4103 v_may_def_optype v_may_defs;
4104 unsigned i;
4106 get_stmt_operands (stmt);
4108 uses = STMT_USE_OPS (stmt);
4109 for (i = 0; i < NUM_USES (uses); i++)
4110 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4112 vuses = STMT_VUSE_OPS (stmt);
4113 for (i = 0; i < NUM_VUSES (vuses); i++)
4114 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4116 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4117 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4118 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4121 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4122 so that they are emitted on the correct place, and so that the loop closed
4123 ssa form is preserved. */
4125 static void
4126 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4128 tree_stmt_iterator tsi;
4129 block_stmt_iterator bsi;
4130 tree phi, stmt, def, next;
4132 if (EDGE_COUNT (exit->dest->preds) > 1)
4133 split_loop_exit_edge (exit);
4135 if (TREE_CODE (stmts) == STATEMENT_LIST)
4137 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4138 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4140 else
4141 protect_loop_closed_ssa_form (exit, stmts);
4143 /* Ensure there is label in exit->dest, so that we can
4144 insert after it. */
4145 tree_block_label (exit->dest);
4146 bsi = bsi_after_labels (exit->dest);
4147 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4149 if (!op)
4150 return;
4152 for (phi = phi_nodes (exit->dest); phi; phi = next)
4154 next = TREE_CHAIN (phi);
4156 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4158 def = PHI_RESULT (phi);
4159 remove_statement (phi, false);
4160 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4161 def, op);
4162 SSA_NAME_DEF_STMT (def) = stmt;
4163 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4168 /* Rewrites the final value of USE (that is only needed outside of the loop)
4169 using candidate CAND. */
4171 static void
4172 rewrite_use_outer (struct ivopts_data *data,
4173 struct iv_use *use, struct iv_cand *cand)
4175 edge exit;
4176 tree value, op, stmts, tgt;
4177 tree phi;
4179 switch (TREE_CODE (use->stmt))
4181 case PHI_NODE:
4182 tgt = PHI_RESULT (use->stmt);
4183 break;
4184 case MODIFY_EXPR:
4185 tgt = TREE_OPERAND (use->stmt, 0);
4186 break;
4187 default:
4188 gcc_unreachable ();
4191 exit = single_dom_exit (data->current_loop);
4193 if (exit)
4195 if (!cand->iv)
4197 bool ok = may_replace_final_value (data->current_loop, use, &value);
4198 gcc_assert (ok);
4200 else
4201 value = get_computation_at (data->current_loop,
4202 use, cand, last_stmt (exit->src));
4204 value = unshare_expr (value);
4205 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4207 /* If we will preserve the iv anyway and we would need to perform
4208 some computation to replace the final value, do nothing. */
4209 if (stmts && name_info (data, tgt)->preserve_biv)
4210 return;
4212 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4214 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4216 if (USE_FROM_PTR (use_p) == tgt)
4217 SET_USE (use_p, op);
4220 if (stmts)
4221 compute_phi_arg_on_exit (exit, stmts, op);
4223 /* Enable removal of the statement. We cannot remove it directly,
4224 since we may still need the aliasing information attached to the
4225 ssa name defined by it. */
4226 name_info (data, tgt)->iv->have_use_for = false;
4227 return;
4230 /* If the variable is going to be preserved anyway, there is nothing to
4231 do. */
4232 if (name_info (data, tgt)->preserve_biv)
4233 return;
4235 /* Otherwise we just need to compute the iv. */
4236 rewrite_use_nonlinear_expr (data, use, cand);
4239 /* Rewrites USE using candidate CAND. */
4241 static void
4242 rewrite_use (struct ivopts_data *data,
4243 struct iv_use *use, struct iv_cand *cand)
4245 switch (use->type)
4247 case USE_NONLINEAR_EXPR:
4248 rewrite_use_nonlinear_expr (data, use, cand);
4249 break;
4251 case USE_OUTER:
4252 rewrite_use_outer (data, use, cand);
4253 break;
4255 case USE_ADDRESS:
4256 rewrite_use_address (data, use, cand);
4257 break;
4259 case USE_COMPARE:
4260 rewrite_use_compare (data, use, cand);
4261 break;
4263 default:
4264 gcc_unreachable ();
4266 modify_stmt (use->stmt);
4269 /* Rewrite the uses using the selected induction variables. */
4271 static void
4272 rewrite_uses (struct ivopts_data *data)
4274 unsigned i;
4275 struct iv_cand *cand;
4276 struct iv_use *use;
4278 for (i = 0; i < n_iv_uses (data); i++)
4280 use = iv_use (data, i);
4281 cand = use->selected;
4282 gcc_assert (cand);
4284 rewrite_use (data, use, cand);
4288 /* Removes the ivs that are not used after rewriting. */
4290 static void
4291 remove_unused_ivs (struct ivopts_data *data)
4293 unsigned j;
4294 bitmap_iterator bi;
4296 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4298 struct version_info *info;
4300 info = ver_info (data, j);
4301 if (info->iv
4302 && !zero_p (info->iv->step)
4303 && !info->inv_id
4304 && !info->iv->have_use_for
4305 && !info->preserve_biv)
4306 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4310 /* Frees data allocated by the optimization of a single loop. */
4312 static void
4313 free_loop_data (struct ivopts_data *data)
4315 unsigned i, j;
4316 bitmap_iterator bi;
4318 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4320 struct version_info *info;
4322 info = ver_info (data, i);
4323 if (info->iv)
4324 free (info->iv);
4325 info->iv = NULL;
4326 info->has_nonlin_use = false;
4327 info->preserve_biv = false;
4328 info->inv_id = 0;
4330 bitmap_clear (data->relevant);
4332 for (i = 0; i < n_iv_uses (data); i++)
4334 struct iv_use *use = iv_use (data, i);
4336 free (use->iv);
4337 BITMAP_XFREE (use->related_cands);
4338 for (j = 0; j < use->n_map_members; j++)
4339 if (use->cost_map[j].depends_on)
4340 BITMAP_XFREE (use->cost_map[j].depends_on);
4341 free (use->cost_map);
4342 free (use);
4344 VARRAY_POP_ALL (data->iv_uses);
4346 for (i = 0; i < n_iv_cands (data); i++)
4348 struct iv_cand *cand = iv_cand (data, i);
4350 if (cand->iv)
4351 free (cand->iv);
4352 free (cand);
4354 VARRAY_POP_ALL (data->iv_candidates);
4356 if (data->version_info_size < num_ssa_names)
4358 data->version_info_size = 2 * num_ssa_names;
4359 free (data->version_info);
4360 data->version_info = xcalloc (data->version_info_size,
4361 sizeof (struct version_info));
4364 data->max_inv_id = 0;
4366 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4368 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4370 SET_DECL_RTL (obj, NULL_RTX);
4372 VARRAY_POP_ALL (decl_rtl_to_reset);
4375 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4376 loop tree. */
4378 static void
4379 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4381 unsigned i;
4383 for (i = 1; i < loops->num; i++)
4384 if (loops->parray[i])
4386 free (loops->parray[i]->aux);
4387 loops->parray[i]->aux = NULL;
4390 free_loop_data (data);
4391 free (data->version_info);
4392 BITMAP_XFREE (data->relevant);
4394 VARRAY_FREE (decl_rtl_to_reset);
4395 VARRAY_FREE (data->iv_uses);
4396 VARRAY_FREE (data->iv_candidates);
4399 /* Optimizes the LOOP. Returns true if anything changed. */
4401 static bool
4402 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4404 bool changed = false;
4405 bitmap iv_set;
4406 edge exit;
4408 data->current_loop = loop;
4410 if (dump_file && (dump_flags & TDF_DETAILS))
4412 fprintf (dump_file, "Processing loop %d\n", loop->num);
4414 exit = single_dom_exit (loop);
4415 if (exit)
4417 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4418 exit->src->index, exit->dest->index);
4419 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4420 fprintf (dump_file, "\n");
4423 fprintf (dump_file, "\n");
4426 /* For each ssa name determines whether it behaves as an induction variable
4427 in some loop. */
4428 if (!find_induction_variables (data))
4429 goto finish;
4431 /* Finds interesting uses (item 1). */
4432 find_interesting_uses (data);
4433 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4434 goto finish;
4436 /* Finds candidates for the induction variables (item 2). */
4437 find_iv_candidates (data);
4439 /* Calculates the costs (item 3, part 1). */
4440 determine_use_iv_costs (data);
4441 determine_iv_costs (data);
4442 determine_set_costs (data);
4444 /* Find the optimal set of induction variables (item 3, part 2). */
4445 iv_set = find_optimal_iv_set (data);
4446 if (!iv_set)
4447 goto finish;
4448 changed = true;
4450 /* Create the new induction variables (item 4, part 1). */
4451 create_new_ivs (data, iv_set);
4453 /* Rewrite the uses (item 4, part 2). */
4454 rewrite_uses (data);
4456 /* Remove the ivs that are unused after rewriting. */
4457 remove_unused_ivs (data);
4459 loop_commit_inserts ();
4461 BITMAP_XFREE (iv_set);
4463 /* We have changed the structure of induction variables; it might happen
4464 that definitions in the scev database refer to some of them that were
4465 eliminated. */
4466 scev_reset ();
4468 finish:
4469 free_loop_data (data);
4471 return changed;
4474 /* Main entry point. Optimizes induction variables in LOOPS. */
4476 void
4477 tree_ssa_iv_optimize (struct loops *loops)
4479 struct loop *loop;
4480 struct ivopts_data data;
4482 tree_ssa_iv_optimize_init (loops, &data);
4484 /* Optimize the loops starting with the innermost ones. */
4485 loop = loops->tree_root;
4486 while (loop->inner)
4487 loop = loop->inner;
4489 #ifdef ENABLE_CHECKING
4490 verify_loop_closed_ssa ();
4491 verify_stmts ();
4492 #endif
4494 /* Scan the loops, inner ones first. */
4495 while (loop != loops->tree_root)
4497 if (dump_file && (dump_flags & TDF_DETAILS))
4498 flow_loop_dump (loop, dump_file, NULL, 1);
4499 if (tree_ssa_iv_optimize_loop (&data, loop))
4501 #ifdef ENABLE_CHECKING
4502 verify_loop_closed_ssa ();
4503 verify_stmts ();
4504 #endif
4507 if (loop->next)
4509 loop = loop->next;
4510 while (loop->inner)
4511 loop = loop->inner;
4513 else
4514 loop = loop->outer;
4517 tree_ssa_iv_optimize_finalize (loops, &data);