* varasm.c (default_assemble_integer): Return false for values wider
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob078e8a36cf9f7f9286d5da1c6d077c0ad2dc08b9
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
126 struct loop_data
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
134 /* Types of uses. */
135 enum use_type
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
144 struct cost_pair
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
149 preserved. */
152 /* Use. */
153 struct iv_use
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 struct ivopts_data
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
222 use. */
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
238 here. */
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
279 return loop->aux;
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
284 static edge
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
289 if (!exit)
290 return NULL;
292 if (!just_once_each_iteration_p (loop, exit->src))
293 return NULL;
295 return exit;
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
301 void
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
312 if (iv->step)
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
322 else
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
329 if (iv->biv_p)
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
336 void
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
343 switch (use->type)
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
347 break;
349 case USE_OUTER:
350 fprintf (file, " outside\n");
351 break;
353 case USE_ADDRESS:
354 fprintf (file, " address\n");
355 break;
357 case USE_COMPARE:
358 fprintf (file, " compare\n");
359 break;
361 default:
362 gcc_unreachable ();
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
370 if (use->op_p)
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
402 void
403 dump_uses (FILE *file, struct ivopts_data *data)
405 unsigned i;
406 struct iv_use *use;
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
420 void
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
428 if (!iv)
430 fprintf (file, " final value replacement\n");
431 return;
434 switch (cand->pos)
436 case IP_NORMAL:
437 fprintf (file, " incremented before exit test\n");
438 break;
440 case IP_END:
441 fprintf (file, " incremented at end\n");
442 break;
444 case IP_ORIGINAL:
445 fprintf (file, " original biv\n");
446 break;
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
453 if (iv->step)
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
463 else
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
488 2^BITS. */
490 static bool
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
492 HOST_WIDE_INT *x)
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
496 unsigned i;
498 a &= mask;
499 b &= mask;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
504 a >>= 1;
505 b >>= 1;
506 bits--;
507 mask >>= 1;
510 if (!(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
513 return false;
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
518 inv = 1;
519 ex = b;
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
531 val |= ~mask;
533 *x = val;
535 return true;
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
539 emitted in LOOP. */
541 static bool
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
546 gcc_assert (bb);
548 if (sbb == loop->latch)
549 return true;
551 if (sbb != bb)
552 return false;
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
560 static bool
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
568 return false;
570 if (stmt_bb != cand_bb)
571 return true;
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
578 return false;
579 if (bsi_stmt (bsi) == stmt)
580 return true;
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
587 static bool
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
590 switch (cand->pos)
592 case IP_END:
593 return false;
595 case IP_NORMAL:
596 return stmt_after_ip_normal_pos (loop, stmt);
598 case IP_ORIGINAL:
599 return stmt_after_ip_original_pos (cand, stmt);
601 default:
602 gcc_unreachable ();
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
609 static void
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
612 unsigned i;
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
630 for loop LOOP. */
632 static struct iv *
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
638 step = NULL_TREE;
640 iv->base = base;
641 iv->step = step;
642 iv->biv_p = false;
643 iv->have_use_for = false;
644 iv->use_id = 0;
645 iv->ssa_name = NULL_TREE;
647 return iv;
650 /* Sets STEP and BASE for induction variable IV. */
652 static void
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
666 static struct iv *
667 get_iv (struct ivopts_data *data, tree var)
669 basic_block bb;
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
675 if (!bb
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
685 static tree
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
693 return NULL_TREE;
695 if (!simple_iv (loop, phi, name, &base, &step))
696 return NULL_TREE;
698 if (!step)
699 return build_int_cst (type, 0);
701 return step;
704 /* Returns false if INDEX is a ssa name that occurs in an
705 abnormal phi node. Callback for for_each_index. */
707 static bool
708 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
709 void *data ATTRIBUTE_UNUSED)
711 if (TREE_CODE (*index) != SSA_NAME)
712 return true;
714 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
717 /* Returns true if EXPR contains a ssa name that occurs in an
718 abnormal phi node. */
720 static bool
721 contains_abnormal_ssa_name_p (tree expr)
723 enum tree_code code = TREE_CODE (expr);
724 enum tree_code_class class = TREE_CODE_CLASS (code);
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
731 return false;
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 1),
735 idx_contains_abnormal_ssa_name_p,
736 NULL);
738 switch (class)
740 case tcc_binary:
741 case tcc_comparison:
742 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
743 return true;
745 /* Fallthru. */
746 case tcc_unary:
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
748 return true;
750 break;
752 default:
753 gcc_unreachable ();
756 return false;
759 /* Finds basic ivs. */
761 static bool
762 find_bivs (struct ivopts_data *data)
764 tree phi, step, type, base;
765 bool found = false;
766 struct loop *loop = data->current_loop;
768 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
770 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
771 continue;
773 step = determine_biv_step (phi);
775 if (!step)
776 continue;
777 if (cst_and_fits_in_hwi (step)
778 && int_cst_value (step) == 0)
779 continue;
781 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
782 if (contains_abnormal_ssa_name_p (base))
783 continue;
785 type = TREE_TYPE (PHI_RESULT (phi));
786 base = fold_convert (type, base);
787 step = fold_convert (type, step);
789 /* FIXME: We do not handle induction variables whose step does
790 not satisfy cst_and_fits_in_hwi. */
791 if (!cst_and_fits_in_hwi (step))
792 continue;
794 set_iv (data, PHI_RESULT (phi), base, step);
795 found = true;
798 return found;
801 /* Marks basic ivs. */
803 static void
804 mark_bivs (struct ivopts_data *data)
806 tree phi, var;
807 struct iv *iv, *incr_iv;
808 struct loop *loop = data->current_loop;
809 basic_block incr_bb;
811 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
813 iv = get_iv (data, PHI_RESULT (phi));
814 if (!iv)
815 continue;
817 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
818 incr_iv = get_iv (data, var);
819 if (!incr_iv)
820 continue;
822 /* If the increment is in the subloop, ignore it. */
823 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
824 if (incr_bb->loop_father != data->current_loop
825 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
826 continue;
828 iv->biv_p = true;
829 incr_iv->biv_p = true;
833 /* Checks whether STMT defines a linear induction variable and stores its
834 parameters to BASE and STEP. */
836 static bool
837 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
838 tree *base, tree *step)
840 tree lhs;
841 struct loop *loop = data->current_loop;
843 *base = NULL_TREE;
844 *step = NULL_TREE;
846 if (TREE_CODE (stmt) != MODIFY_EXPR)
847 return false;
849 lhs = TREE_OPERAND (stmt, 0);
850 if (TREE_CODE (lhs) != SSA_NAME)
851 return false;
853 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
854 return false;
856 /* FIXME: We do not handle induction variables whose step does
857 not satisfy cst_and_fits_in_hwi. */
858 if (!zero_p (*step)
859 && !cst_and_fits_in_hwi (*step))
860 return false;
862 if (contains_abnormal_ssa_name_p (*base))
863 return false;
865 return true;
868 /* Finds general ivs in statement STMT. */
870 static void
871 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
873 tree base, step;
875 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
876 return;
878 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
881 /* Finds general ivs in basic block BB. */
883 static void
884 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
886 block_stmt_iterator bsi;
888 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
889 find_givs_in_stmt (data, bsi_stmt (bsi));
892 /* Finds general ivs. */
894 static void
895 find_givs (struct ivopts_data *data)
897 struct loop *loop = data->current_loop;
898 basic_block *body = get_loop_body_in_dom_order (loop);
899 unsigned i;
901 for (i = 0; i < loop->num_nodes; i++)
902 find_givs_in_bb (data, body[i]);
903 free (body);
906 /* Determine the number of iterations of the current loop. */
908 static void
909 determine_number_of_iterations (struct ivopts_data *data)
911 struct loop *loop = data->current_loop;
912 edge exit = single_dom_exit (loop);
914 if (!exit)
915 return;
917 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
920 /* For each ssa name defined in LOOP determines whether it is an induction
921 variable and if so, its initial value and step. */
923 static bool
924 find_induction_variables (struct ivopts_data *data)
926 unsigned i;
927 struct loop *loop = data->current_loop;
929 if (!find_bivs (data))
930 return false;
932 find_givs (data);
933 mark_bivs (data);
934 determine_number_of_iterations (data);
936 if (dump_file && (dump_flags & TDF_DETAILS))
938 if (loop_data (loop)->niter.niter)
940 fprintf (dump_file, " number of iterations ");
941 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
942 TDF_SLIM);
943 fprintf (dump_file, "\n");
945 fprintf (dump_file, " may be zero if ");
946 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
947 TDF_SLIM);
948 fprintf (dump_file, "\n");
950 fprintf (dump_file, " bogus unless ");
951 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
952 TDF_SLIM);
953 fprintf (dump_file, "\n");
954 fprintf (dump_file, "\n");
957 fprintf (dump_file, "Induction variables:\n\n");
959 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
961 if (ver_info (data, i)->iv)
962 dump_iv (dump_file, ver_info (data, i)->iv);
966 return true;
969 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
971 static struct iv_use *
972 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
973 tree stmt, enum use_type use_type)
975 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
977 use->id = n_iv_uses (data);
978 use->type = use_type;
979 use->iv = iv;
980 use->stmt = stmt;
981 use->op_p = use_p;
982 use->related_cands = BITMAP_XMALLOC ();
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 dump_use (dump_file, use);
987 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
989 return use;
992 /* Checks whether OP is a loop-level invariant and if so, records it.
993 NONLINEAR_USE is true if the invariant is used in a way we do not
994 handle specially. */
996 static void
997 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
999 basic_block bb;
1000 struct version_info *info;
1002 if (TREE_CODE (op) != SSA_NAME
1003 || !is_gimple_reg (op))
1004 return;
1006 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1007 if (bb
1008 && flow_bb_inside_loop_p (data->current_loop, bb))
1009 return;
1011 info = name_info (data, op);
1012 info->name = op;
1013 info->has_nonlin_use |= nonlinear_use;
1014 if (!info->inv_id)
1015 info->inv_id = ++data->max_inv_id;
1016 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1019 /* Checks whether the use OP is interesting and if so, records it
1020 as TYPE. */
1022 static struct iv_use *
1023 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1024 enum use_type type)
1026 struct iv *iv;
1027 struct iv *civ;
1028 tree stmt;
1029 struct iv_use *use;
1031 if (TREE_CODE (op) != SSA_NAME)
1032 return NULL;
1034 iv = get_iv (data, op);
1035 if (!iv)
1036 return NULL;
1038 if (iv->have_use_for)
1040 use = iv_use (data, iv->use_id);
1042 gcc_assert (use->type == USE_NONLINEAR_EXPR
1043 || use->type == USE_OUTER);
1045 if (type == USE_NONLINEAR_EXPR)
1046 use->type = USE_NONLINEAR_EXPR;
1047 return use;
1050 if (zero_p (iv->step))
1052 record_invariant (data, op, true);
1053 return NULL;
1055 iv->have_use_for = true;
1057 civ = xmalloc (sizeof (struct iv));
1058 *civ = *iv;
1060 stmt = SSA_NAME_DEF_STMT (op);
1061 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1062 || TREE_CODE (stmt) == MODIFY_EXPR);
1064 use = record_use (data, NULL, civ, stmt, type);
1065 iv->use_id = use->id;
1067 return use;
1070 /* Checks whether the use OP is interesting and if so, records it. */
1072 static struct iv_use *
1073 find_interesting_uses_op (struct ivopts_data *data, tree op)
1075 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1078 /* Records a definition of induction variable OP that is used outside of the
1079 loop. */
1081 static struct iv_use *
1082 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1084 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1087 /* Checks whether the condition *COND_P in STMT is interesting
1088 and if so, records it. */
1090 static void
1091 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1093 tree *op0_p;
1094 tree *op1_p;
1095 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1096 struct iv const_iv;
1097 tree zero = integer_zero_node;
1099 const_iv.step = NULL_TREE;
1101 if (integer_zerop (*cond_p)
1102 || integer_nonzerop (*cond_p))
1103 return;
1105 if (TREE_CODE (*cond_p) == SSA_NAME)
1107 op0_p = cond_p;
1108 op1_p = &zero;
1110 else
1112 op0_p = &TREE_OPERAND (*cond_p, 0);
1113 op1_p = &TREE_OPERAND (*cond_p, 1);
1116 if (TREE_CODE (*op0_p) == SSA_NAME)
1117 iv0 = get_iv (data, *op0_p);
1118 else
1119 iv0 = &const_iv;
1121 if (TREE_CODE (*op1_p) == SSA_NAME)
1122 iv1 = get_iv (data, *op1_p);
1123 else
1124 iv1 = &const_iv;
1126 if (/* When comparing with non-invariant value, we may not do any senseful
1127 induction variable elimination. */
1128 (!iv0 || !iv1)
1129 /* Eliminating condition based on two ivs would be nontrivial.
1130 ??? TODO -- it is not really important to handle this case. */
1131 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1133 find_interesting_uses_op (data, *op0_p);
1134 find_interesting_uses_op (data, *op1_p);
1135 return;
1138 if (zero_p (iv0->step) && zero_p (iv1->step))
1140 /* If both are invariants, this is a work for unswitching. */
1141 return;
1144 civ = xmalloc (sizeof (struct iv));
1145 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1146 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1149 /* Cumulates the steps of indices into DATA and replaces their values with the
1150 initial ones. Returns false when the value of the index cannot be determined.
1151 Callback for for_each_index. */
1153 struct ifs_ivopts_data
1155 struct ivopts_data *ivopts_data;
1156 tree stmt;
1157 tree *step_p;
1160 static bool
1161 idx_find_step (tree base, tree *idx, void *data)
1163 struct ifs_ivopts_data *dta = data;
1164 struct iv *iv;
1165 tree step, type, iv_type, iv_step, lbound;
1166 basic_block def_bb;
1167 struct loop *loop = dta->ivopts_data->current_loop;
1169 if (TREE_CODE (*idx) != SSA_NAME)
1170 return true;
1172 iv = get_iv (dta->ivopts_data, *idx);
1173 if (!iv)
1174 return false;
1176 *idx = iv->base;
1178 if (!iv->step)
1179 return true;
1181 iv_type = TREE_TYPE (iv->base);
1182 type = build_pointer_type (TREE_TYPE (base));
1183 if (TREE_CODE (base) == ARRAY_REF)
1185 step = array_ref_element_size (base);
1186 lbound = array_ref_low_bound (base);
1188 /* We only handle addresses whose step is an integer constant. */
1189 if (TREE_CODE (step) != INTEGER_CST)
1190 return false;
1192 /* We need the lower bound to be invariant in loop, since otherwise
1193 we are unable to initialize a new induction variable created
1194 in strength reduction -- we need to take the address of the
1195 reference in front of the loop. */
1196 if (is_gimple_min_invariant (lbound))
1197 ; /* Nothing to do. */
1198 else if (TREE_CODE (lbound) != SSA_NAME)
1199 return false;
1200 else
1202 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (lbound));
1203 if (def_bb
1204 && flow_bb_inside_loop_p (loop, def_bb))
1205 return false;
1208 else
1209 /* The step for pointer arithmetics already is 1 byte. */
1210 step = build_int_cst (type, 1);
1212 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1213 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1214 type, iv->base, iv->step, dta->stmt);
1215 else
1216 iv_step = fold_convert (iv_type, iv->step);
1218 if (!iv_step)
1220 /* The index might wrap. */
1221 return false;
1224 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1226 if (!*dta->step_p)
1227 *dta->step_p = step;
1228 else
1229 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1231 return true;
1234 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1235 object is passed to it in DATA. */
1237 static bool
1238 idx_record_use (tree base, tree *idx,
1239 void *data)
1241 find_interesting_uses_op (data, *idx);
1242 if (TREE_CODE (base) == ARRAY_REF)
1244 find_interesting_uses_op (data, array_ref_element_size (base));
1245 find_interesting_uses_op (data, array_ref_low_bound (base));
1247 return true;
1250 /* Finds addresses in *OP_P inside STMT. */
1252 static void
1253 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1255 tree base = unshare_expr (*op_p), step = NULL;
1256 struct iv *civ;
1257 struct ifs_ivopts_data ifs_ivopts_data;
1259 /* Ignore bitfields for now. Not really something terribly complicated
1260 to handle. TODO. */
1261 if (TREE_CODE (base) == COMPONENT_REF
1262 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1263 goto fail;
1265 ifs_ivopts_data.ivopts_data = data;
1266 ifs_ivopts_data.stmt = stmt;
1267 ifs_ivopts_data.step_p = &step;
1268 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1269 || zero_p (step))
1270 goto fail;
1272 if (TREE_CODE (base) == INDIRECT_REF)
1273 base = TREE_OPERAND (base, 0);
1274 else
1275 base = build_addr (base);
1277 civ = alloc_iv (base, step);
1278 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1279 return;
1281 fail:
1282 for_each_index (op_p, idx_record_use, data);
1285 /* Finds and records invariants used in STMT. */
1287 static void
1288 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1290 use_optype uses = NULL;
1291 unsigned i, n;
1292 tree op;
1294 if (TREE_CODE (stmt) == PHI_NODE)
1295 n = PHI_NUM_ARGS (stmt);
1296 else
1298 get_stmt_operands (stmt);
1299 uses = STMT_USE_OPS (stmt);
1300 n = NUM_USES (uses);
1303 for (i = 0; i < n; i++)
1305 if (TREE_CODE (stmt) == PHI_NODE)
1306 op = PHI_ARG_DEF (stmt, i);
1307 else
1308 op = USE_OP (uses, i);
1310 record_invariant (data, op, false);
1314 /* Finds interesting uses of induction variables in the statement STMT. */
1316 static void
1317 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1319 struct iv *iv;
1320 tree op, lhs, rhs;
1321 use_optype uses = NULL;
1322 unsigned i, n;
1324 find_invariants_stmt (data, stmt);
1326 if (TREE_CODE (stmt) == COND_EXPR)
1328 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1329 return;
1332 if (TREE_CODE (stmt) == MODIFY_EXPR)
1334 lhs = TREE_OPERAND (stmt, 0);
1335 rhs = TREE_OPERAND (stmt, 1);
1337 if (TREE_CODE (lhs) == SSA_NAME)
1339 /* If the statement defines an induction variable, the uses are not
1340 interesting by themselves. */
1342 iv = get_iv (data, lhs);
1344 if (iv && !zero_p (iv->step))
1345 return;
1348 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1350 case tcc_comparison:
1351 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1352 return;
1354 case tcc_reference:
1355 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1356 if (REFERENCE_CLASS_P (lhs))
1357 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1358 return;
1360 default: ;
1363 if (REFERENCE_CLASS_P (lhs)
1364 && is_gimple_val (rhs))
1366 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1367 find_interesting_uses_op (data, rhs);
1368 return;
1371 /* TODO -- we should also handle address uses of type
1373 memory = call (whatever);
1377 call (memory). */
1380 if (TREE_CODE (stmt) == PHI_NODE
1381 && bb_for_stmt (stmt) == data->current_loop->header)
1383 lhs = PHI_RESULT (stmt);
1384 iv = get_iv (data, lhs);
1386 if (iv && !zero_p (iv->step))
1387 return;
1390 if (TREE_CODE (stmt) == PHI_NODE)
1391 n = PHI_NUM_ARGS (stmt);
1392 else
1394 uses = STMT_USE_OPS (stmt);
1395 n = NUM_USES (uses);
1398 for (i = 0; i < n; i++)
1400 if (TREE_CODE (stmt) == PHI_NODE)
1401 op = PHI_ARG_DEF (stmt, i);
1402 else
1403 op = USE_OP (uses, i);
1405 if (TREE_CODE (op) != SSA_NAME)
1406 continue;
1408 iv = get_iv (data, op);
1409 if (!iv)
1410 continue;
1412 find_interesting_uses_op (data, op);
1416 /* Finds interesting uses of induction variables outside of loops
1417 on loop exit edge EXIT. */
1419 static void
1420 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1422 tree phi, def;
1424 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1426 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1427 find_interesting_uses_outer (data, def);
1431 /* Finds uses of the induction variables that are interesting. */
1433 static void
1434 find_interesting_uses (struct ivopts_data *data)
1436 basic_block bb;
1437 block_stmt_iterator bsi;
1438 tree phi;
1439 basic_block *body = get_loop_body (data->current_loop);
1440 unsigned i;
1441 struct version_info *info;
1442 edge e;
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1445 fprintf (dump_file, "Uses:\n\n");
1447 for (i = 0; i < data->current_loop->num_nodes; i++)
1449 bb = body[i];
1451 for (e = bb->succ; e; e = e->succ_next)
1452 if (e->dest != EXIT_BLOCK_PTR
1453 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1454 find_interesting_uses_outside (data, e);
1456 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1457 find_interesting_uses_stmt (data, phi);
1458 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1459 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1464 fprintf (dump_file, "\n");
1466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1468 info = ver_info (data, i);
1469 if (info->inv_id)
1471 fprintf (dump_file, " ");
1472 print_generic_expr (dump_file, info->name, TDF_SLIM);
1473 fprintf (dump_file, " is invariant (%d)%s\n",
1474 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1478 fprintf (dump_file, "\n");
1481 free (body);
1484 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1485 position to POS. If USE is not NULL, the candidate is set as related to
1486 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1487 replacement of the final value of the iv by a direct computation. */
1489 static struct iv_cand *
1490 add_candidate_1 (struct ivopts_data *data,
1491 tree base, tree step, bool important, enum iv_position pos,
1492 struct iv_use *use, tree incremented_at)
1494 unsigned i;
1495 struct iv_cand *cand = NULL;
1496 tree type;
1498 if (base)
1500 type = TREE_TYPE (base);
1501 if (!TYPE_UNSIGNED (type))
1503 type = unsigned_type_for (type);
1504 base = fold_convert (type, base);
1505 if (step)
1506 step = fold_convert (type, step);
1510 for (i = 0; i < n_iv_cands (data); i++)
1512 cand = iv_cand (data, i);
1514 if (cand->pos != pos)
1515 continue;
1517 if (cand->incremented_at != incremented_at)
1518 continue;
1520 if (!cand->iv)
1522 if (!base && !step)
1523 break;
1525 continue;
1528 if (!base && !step)
1529 continue;
1531 if (!operand_equal_p (base, cand->iv->base, 0))
1532 continue;
1534 if (zero_p (cand->iv->step))
1536 if (zero_p (step))
1537 break;
1539 else
1541 if (step && operand_equal_p (step, cand->iv->step, 0))
1542 break;
1546 if (i == n_iv_cands (data))
1548 cand = xcalloc (1, sizeof (struct iv_cand));
1549 cand->id = i;
1551 if (!base && !step)
1552 cand->iv = NULL;
1553 else
1554 cand->iv = alloc_iv (base, step);
1556 cand->pos = pos;
1557 if (pos != IP_ORIGINAL && cand->iv)
1559 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1560 cand->var_after = cand->var_before;
1562 cand->important = important;
1563 cand->incremented_at = incremented_at;
1564 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 dump_cand (dump_file, cand);
1570 if (important && !cand->important)
1572 cand->important = true;
1573 if (dump_file && (dump_flags & TDF_DETAILS))
1574 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1577 if (use)
1579 bitmap_set_bit (use->related_cands, i);
1580 if (dump_file && (dump_flags & TDF_DETAILS))
1581 fprintf (dump_file, "Candidate %d is related to use %d\n",
1582 cand->id, use->id);
1585 return cand;
1588 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1589 position to POS. If USE is not NULL, the candidate is set as related to
1590 it. The candidate computation is scheduled on all available positions. */
1592 static void
1593 add_candidate (struct ivopts_data *data,
1594 tree base, tree step, bool important, struct iv_use *use)
1596 if (ip_normal_pos (data->current_loop))
1597 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1598 if (ip_end_pos (data->current_loop))
1599 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1602 /* Adds standard iv candidates. */
1604 static void
1605 add_standard_iv_candidates (struct ivopts_data *data)
1607 /* Add 0 + 1 * iteration candidate. */
1608 add_candidate (data,
1609 build_int_cst (unsigned_intSI_type_node, 0),
1610 build_int_cst (unsigned_intSI_type_node, 1),
1611 true, NULL);
1613 /* The same for a long type if it is still fast enough. */
1614 if (BITS_PER_WORD > 32)
1615 add_candidate (data,
1616 build_int_cst (unsigned_intDI_type_node, 0),
1617 build_int_cst (unsigned_intDI_type_node, 1),
1618 true, NULL);
1622 /* Adds candidates bases on the old induction variable IV. */
1624 static void
1625 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1627 tree phi, def;
1628 struct iv_cand *cand;
1630 add_candidate (data, iv->base, iv->step, true, NULL);
1632 /* The same, but with initial value zero. */
1633 add_candidate (data,
1634 build_int_cst (TREE_TYPE (iv->base), 0),
1635 iv->step, true, NULL);
1637 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1638 if (TREE_CODE (phi) == PHI_NODE)
1640 /* Additionally record the possibility of leaving the original iv
1641 untouched. */
1642 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1643 cand = add_candidate_1 (data,
1644 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1645 SSA_NAME_DEF_STMT (def));
1646 cand->var_before = iv->ssa_name;
1647 cand->var_after = def;
1651 /* Adds candidates based on the old induction variables. */
1653 static void
1654 add_old_ivs_candidates (struct ivopts_data *data)
1656 unsigned i;
1657 struct iv *iv;
1659 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1661 iv = ver_info (data, i)->iv;
1662 if (iv && iv->biv_p && !zero_p (iv->step))
1663 add_old_iv_candidates (data, iv);
1667 /* Adds candidates based on the value of the induction variable IV and USE. */
1669 static void
1670 add_iv_value_candidates (struct ivopts_data *data,
1671 struct iv *iv, struct iv_use *use)
1673 add_candidate (data, iv->base, iv->step, false, use);
1675 /* The same, but with initial value zero. */
1676 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1677 iv->step, false, use);
1680 /* Adds candidates based on the address IV and USE. */
1682 static void
1683 add_address_candidates (struct ivopts_data *data,
1684 struct iv *iv, struct iv_use *use)
1686 tree base, abase, tmp, *act;
1688 /* First, the trivial choices. */
1689 add_iv_value_candidates (data, iv, use);
1691 /* Second, try removing the COMPONENT_REFs. */
1692 if (TREE_CODE (iv->base) == ADDR_EXPR)
1694 base = TREE_OPERAND (iv->base, 0);
1695 while (TREE_CODE (base) == COMPONENT_REF
1696 || (TREE_CODE (base) == ARRAY_REF
1697 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1698 base = TREE_OPERAND (base, 0);
1700 if (base != TREE_OPERAND (iv->base, 0))
1702 if (TREE_CODE (base) == INDIRECT_REF)
1703 base = TREE_OPERAND (base, 0);
1704 else
1705 base = build_addr (base);
1706 add_candidate (data, base, iv->step, false, use);
1710 /* Third, try removing the constant offset. */
1711 abase = iv->base;
1712 while (TREE_CODE (abase) == PLUS_EXPR
1713 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1714 abase = TREE_OPERAND (abase, 0);
1715 /* We found the offset, so make the copy of the non-shared part and
1716 remove it. */
1717 if (TREE_CODE (abase) == PLUS_EXPR)
1719 tmp = iv->base;
1720 act = &base;
1722 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1724 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1725 NULL_TREE, TREE_OPERAND (tmp, 1));
1726 act = &TREE_OPERAND (*act, 0);
1728 *act = TREE_OPERAND (tmp, 0);
1730 add_candidate (data, base, iv->step, false, use);
1734 /* Possibly adds pseudocandidate for replacing the final value of USE by
1735 a direct computation. */
1737 static void
1738 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1740 struct tree_niter_desc *niter;
1741 struct loop *loop = data->current_loop;
1743 /* We must know where we exit the loop and how many times does it roll. */
1744 if (!single_dom_exit (loop))
1745 return;
1747 niter = &loop_data (loop)->niter;
1748 if (!niter->niter
1749 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1750 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1751 return;
1753 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1756 /* Adds candidates based on the uses. */
1758 static void
1759 add_derived_ivs_candidates (struct ivopts_data *data)
1761 unsigned i;
1763 for (i = 0; i < n_iv_uses (data); i++)
1765 struct iv_use *use = iv_use (data, i);
1767 if (!use)
1768 continue;
1770 switch (use->type)
1772 case USE_NONLINEAR_EXPR:
1773 case USE_COMPARE:
1774 /* Just add the ivs based on the value of the iv used here. */
1775 add_iv_value_candidates (data, use->iv, use);
1776 break;
1778 case USE_OUTER:
1779 add_iv_value_candidates (data, use->iv, use);
1781 /* Additionally, add the pseudocandidate for the possibility to
1782 replace the final value by a direct computation. */
1783 add_iv_outer_candidates (data, use);
1784 break;
1786 case USE_ADDRESS:
1787 add_address_candidates (data, use->iv, use);
1788 break;
1790 default:
1791 gcc_unreachable ();
1796 /* Finds the candidates for the induction variables. */
1798 static void
1799 find_iv_candidates (struct ivopts_data *data)
1801 /* Add commonly used ivs. */
1802 add_standard_iv_candidates (data);
1804 /* Add old induction variables. */
1805 add_old_ivs_candidates (data);
1807 /* Add induction variables derived from uses. */
1808 add_derived_ivs_candidates (data);
1811 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1812 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1813 we allocate a simple list to every use. */
1815 static void
1816 alloc_use_cost_map (struct ivopts_data *data)
1818 unsigned i, n_imp = 0, size, j;
1820 if (!data->consider_all_candidates)
1822 for (i = 0; i < n_iv_cands (data); i++)
1824 struct iv_cand *cand = iv_cand (data, i);
1825 if (cand->important)
1826 n_imp++;
1830 for (i = 0; i < n_iv_uses (data); i++)
1832 struct iv_use *use = iv_use (data, i);
1834 if (data->consider_all_candidates)
1836 size = n_iv_cands (data);
1837 use->n_map_members = size;
1839 else
1841 size = n_imp;
1842 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1843 use->n_map_members = 0;
1846 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1850 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1851 on invariants DEPENDS_ON. */
1853 static void
1854 set_use_iv_cost (struct ivopts_data *data,
1855 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1856 bitmap depends_on)
1858 if (cost == INFTY
1859 && depends_on)
1861 BITMAP_XFREE (depends_on);
1862 depends_on = NULL;
1865 if (data->consider_all_candidates)
1867 use->cost_map[cand->id].cand = cand;
1868 use->cost_map[cand->id].cost = cost;
1869 use->cost_map[cand->id].depends_on = depends_on;
1870 return;
1873 if (cost == INFTY)
1874 return;
1876 use->cost_map[use->n_map_members].cand = cand;
1877 use->cost_map[use->n_map_members].cost = cost;
1878 use->cost_map[use->n_map_members].depends_on = depends_on;
1879 use->n_map_members++;
1882 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1883 DEPENDS_ON. */
1885 static unsigned
1886 get_use_iv_cost (struct ivopts_data *data,
1887 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1889 unsigned i;
1891 if (!cand)
1892 return INFTY;
1894 if (data->consider_all_candidates)
1895 i = cand->id;
1896 else
1898 for (i = 0; i < use->n_map_members; i++)
1899 if (use->cost_map[i].cand == cand)
1900 break;
1902 if (i == use->n_map_members)
1903 return INFTY;
1906 if (depends_on)
1907 *depends_on = use->cost_map[i].depends_on;
1908 return use->cost_map[i].cost;
1911 /* Returns estimate on cost of computing SEQ. */
1913 static unsigned
1914 seq_cost (rtx seq)
1916 unsigned cost = 0;
1917 rtx set;
1919 for (; seq; seq = NEXT_INSN (seq))
1921 set = single_set (seq);
1922 if (set)
1923 cost += rtx_cost (set, SET);
1924 else
1925 cost++;
1928 return cost;
1931 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1932 static rtx
1933 produce_memory_decl_rtl (tree obj, int *regno)
1935 rtx x;
1936 if (!obj)
1937 abort ();
1938 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1940 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1941 x = gen_rtx_SYMBOL_REF (Pmode, name);
1943 else
1944 x = gen_raw_REG (Pmode, (*regno)++);
1946 return gen_rtx_MEM (DECL_MODE (obj), x);
1949 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1950 walk_tree. DATA contains the actual fake register number. */
1952 static tree
1953 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1955 tree obj = NULL_TREE;
1956 rtx x = NULL_RTX;
1957 int *regno = data;
1959 switch (TREE_CODE (*expr_p))
1961 case ADDR_EXPR:
1962 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1963 (handled_component_p (*expr_p)
1964 || TREE_CODE (*expr_p) == REALPART_EXPR
1965 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1966 expr_p = &TREE_OPERAND (*expr_p, 0));
1967 obj = *expr_p;
1968 if (DECL_P (obj))
1969 x = produce_memory_decl_rtl (obj, regno);
1970 break;
1972 case SSA_NAME:
1973 *ws = 0;
1974 obj = SSA_NAME_VAR (*expr_p);
1975 if (!DECL_RTL_SET_P (obj))
1976 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1977 break;
1979 case VAR_DECL:
1980 case PARM_DECL:
1981 case RESULT_DECL:
1982 *ws = 0;
1983 obj = *expr_p;
1985 if (DECL_RTL_SET_P (obj))
1986 break;
1988 if (DECL_MODE (obj) == BLKmode)
1989 x = produce_memory_decl_rtl (obj, regno);
1990 else
1991 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1993 break;
1995 default:
1996 break;
1999 if (x)
2001 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2002 SET_DECL_RTL (obj, x);
2005 return NULL_TREE;
2008 /* Determines cost of the computation of EXPR. */
2010 static unsigned
2011 computation_cost (tree expr)
2013 rtx seq, rslt;
2014 tree type = TREE_TYPE (expr);
2015 unsigned cost;
2016 int regno = 0;
2018 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2019 start_sequence ();
2020 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2021 seq = get_insns ();
2022 end_sequence ();
2024 cost = seq_cost (seq);
2025 if (GET_CODE (rslt) == MEM)
2026 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2028 return cost;
2031 /* Returns variable containing the value of candidate CAND at statement AT. */
2033 static tree
2034 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2036 if (stmt_after_increment (loop, cand, stmt))
2037 return cand->var_after;
2038 else
2039 return cand->var_before;
2042 /* Determines the expression by that USE is expressed from induction variable
2043 CAND at statement AT in LOOP. */
2045 static tree
2046 get_computation_at (struct loop *loop,
2047 struct iv_use *use, struct iv_cand *cand, tree at)
2049 tree ubase = use->iv->base;
2050 tree ustep = use->iv->step;
2051 tree cbase = cand->iv->base;
2052 tree cstep = cand->iv->step;
2053 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2054 tree uutype;
2055 tree expr, delta;
2056 tree ratio;
2057 unsigned HOST_WIDE_INT ustepi, cstepi;
2058 HOST_WIDE_INT ratioi;
2060 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2062 /* We do not have a precision to express the values of use. */
2063 return NULL_TREE;
2066 expr = var_at_stmt (loop, cand, at);
2068 if (TREE_TYPE (expr) != ctype)
2070 /* This may happen with the original ivs. */
2071 expr = fold_convert (ctype, expr);
2074 if (TYPE_UNSIGNED (utype))
2075 uutype = utype;
2076 else
2078 uutype = unsigned_type_for (utype);
2079 ubase = fold_convert (uutype, ubase);
2080 ustep = fold_convert (uutype, ustep);
2083 if (uutype != ctype)
2085 expr = fold_convert (uutype, expr);
2086 cbase = fold_convert (uutype, cbase);
2087 cstep = fold_convert (uutype, cstep);
2090 if (!cst_and_fits_in_hwi (cstep)
2091 || !cst_and_fits_in_hwi (ustep))
2092 return NULL_TREE;
2094 ustepi = int_cst_value (ustep);
2095 cstepi = int_cst_value (cstep);
2097 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2099 /* TODO maybe consider case when ustep divides cstep and the ratio is
2100 a power of 2 (so that the division is fast to execute)? We would
2101 need to be much more careful with overflows etc. then. */
2102 return NULL_TREE;
2105 /* We may need to shift the value if we are after the increment. */
2106 if (stmt_after_increment (loop, cand, at))
2107 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2109 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2110 or |ratio| == 1, it is better to handle this like
2112 ubase - ratio * cbase + ratio * var. */
2114 if (ratioi == 1)
2116 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2117 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2119 else if (ratioi == -1)
2121 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2122 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2124 else if (TREE_CODE (cbase) == INTEGER_CST)
2126 ratio = build_int_cst_type (uutype, ratioi);
2127 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2128 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2129 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2130 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2132 else
2134 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2135 ratio = build_int_cst_type (uutype, ratioi);
2136 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2137 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2140 return fold_convert (utype, expr);
2143 /* Determines the expression by that USE is expressed from induction variable
2144 CAND in LOOP. */
2146 static tree
2147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2149 return get_computation_at (loop, use, cand, use->stmt);
2152 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2154 static void
2155 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2157 tree op0, op1;
2158 enum tree_code code;
2160 while (1)
2162 if (cst_and_fits_in_hwi (*expr))
2164 *offset += int_cst_value (*expr);
2165 *expr = integer_zero_node;
2166 return;
2169 code = TREE_CODE (*expr);
2171 if (code != PLUS_EXPR && code != MINUS_EXPR)
2172 return;
2174 op0 = TREE_OPERAND (*expr, 0);
2175 op1 = TREE_OPERAND (*expr, 1);
2177 if (cst_and_fits_in_hwi (op1))
2179 if (code == PLUS_EXPR)
2180 *offset += int_cst_value (op1);
2181 else
2182 *offset -= int_cst_value (op1);
2184 *expr = op0;
2185 continue;
2188 if (code != PLUS_EXPR)
2189 return;
2191 if (!cst_and_fits_in_hwi (op0))
2192 return;
2194 *offset += int_cst_value (op0);
2195 *expr = op1;
2199 /* Returns cost of addition in MODE. */
2201 static unsigned
2202 add_cost (enum machine_mode mode)
2204 static unsigned costs[NUM_MACHINE_MODES];
2205 rtx seq;
2206 unsigned cost;
2208 if (costs[mode])
2209 return costs[mode];
2211 start_sequence ();
2212 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2213 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2214 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2215 NULL_RTX);
2216 seq = get_insns ();
2217 end_sequence ();
2219 cost = seq_cost (seq);
2220 if (!cost)
2221 cost = 1;
2223 costs[mode] = cost;
2225 if (dump_file && (dump_flags & TDF_DETAILS))
2226 fprintf (dump_file, "Addition in %s costs %d\n",
2227 GET_MODE_NAME (mode), cost);
2228 return cost;
2231 /* Entry in a hashtable of already known costs for multiplication. */
2232 struct mbc_entry
2234 HOST_WIDE_INT cst; /* The constant to multiply by. */
2235 enum machine_mode mode; /* In mode. */
2236 unsigned cost; /* The cost. */
2239 /* Counts hash value for the ENTRY. */
2241 static hashval_t
2242 mbc_entry_hash (const void *entry)
2244 const struct mbc_entry *e = entry;
2246 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2249 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2251 static int
2252 mbc_entry_eq (const void *entry1, const void *entry2)
2254 const struct mbc_entry *e1 = entry1;
2255 const struct mbc_entry *e2 = entry2;
2257 return (e1->mode == e2->mode
2258 && e1->cst == e2->cst);
2261 /* Returns cost of multiplication by constant CST in MODE. */
2263 static unsigned
2264 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2266 static htab_t costs;
2267 struct mbc_entry **cached, act;
2268 rtx seq;
2269 unsigned cost;
2271 if (!costs)
2272 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2274 act.mode = mode;
2275 act.cst = cst;
2276 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2277 if (*cached)
2278 return (*cached)->cost;
2280 *cached = xmalloc (sizeof (struct mbc_entry));
2281 (*cached)->mode = mode;
2282 (*cached)->cst = cst;
2284 start_sequence ();
2285 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2286 NULL_RTX, 0);
2287 seq = get_insns ();
2288 end_sequence ();
2290 cost = seq_cost (seq);
2292 if (dump_file && (dump_flags & TDF_DETAILS))
2293 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2294 (int) cst, GET_MODE_NAME (mode), cost);
2296 (*cached)->cost = cost;
2298 return cost;
2301 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2302 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2303 variable is omitted. The created memory accesses MODE.
2305 TODO -- there must be some better way. This all is quite crude. */
2307 static unsigned
2308 get_address_cost (bool symbol_present, bool var_present,
2309 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2311 #define MAX_RATIO 128
2312 static sbitmap valid_mult;
2313 static HOST_WIDE_INT rat, off;
2314 static HOST_WIDE_INT min_offset, max_offset;
2315 static unsigned costs[2][2][2][2];
2316 unsigned cost, acost;
2317 rtx seq, addr, base;
2318 bool offset_p, ratio_p;
2319 rtx reg1;
2320 HOST_WIDE_INT s_offset;
2321 unsigned HOST_WIDE_INT mask;
2322 unsigned bits;
2324 if (!valid_mult)
2326 HOST_WIDE_INT i;
2328 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2330 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2331 for (i = 1; i <= 1 << 20; i <<= 1)
2333 XEXP (addr, 1) = GEN_INT (i);
2334 if (!memory_address_p (Pmode, addr))
2335 break;
2337 max_offset = i >> 1;
2338 off = max_offset;
2340 for (i = 1; i <= 1 << 20; i <<= 1)
2342 XEXP (addr, 1) = GEN_INT (-i);
2343 if (!memory_address_p (Pmode, addr))
2344 break;
2346 min_offset = -(i >> 1);
2348 if (dump_file && (dump_flags & TDF_DETAILS))
2350 fprintf (dump_file, "get_address_cost:\n");
2351 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2352 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2355 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2356 sbitmap_zero (valid_mult);
2357 rat = 1;
2358 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2359 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2361 XEXP (addr, 1) = GEN_INT (i);
2362 if (memory_address_p (Pmode, addr))
2364 SET_BIT (valid_mult, i + MAX_RATIO);
2365 rat = i;
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, " allowed multipliers:");
2372 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2373 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2374 fprintf (dump_file, " %d", (int) i);
2375 fprintf (dump_file, "\n");
2376 fprintf (dump_file, "\n");
2380 bits = GET_MODE_BITSIZE (Pmode);
2381 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2382 offset &= mask;
2383 if ((offset >> (bits - 1) & 1))
2384 offset |= ~mask;
2385 s_offset = offset;
2387 cost = 0;
2388 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2389 ratio_p = (ratio != 1
2390 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2391 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2393 if (ratio != 1 && !ratio_p)
2394 cost += multiply_by_cost (ratio, Pmode);
2396 if (s_offset && !offset_p && !symbol_present)
2398 cost += add_cost (Pmode);
2399 var_present = true;
2402 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2403 if (!acost)
2405 acost = 0;
2407 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2408 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2409 if (ratio_p)
2410 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2412 if (symbol_present)
2414 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2415 if (offset_p)
2416 base = gen_rtx_fmt_e (CONST, Pmode,
2417 gen_rtx_fmt_ee (PLUS, Pmode,
2418 base,
2419 GEN_INT (off)));
2420 if (var_present)
2421 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2424 else if (var_present)
2426 base = reg1;
2427 if (offset_p)
2428 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2430 else if (offset_p)
2431 base = GEN_INT (off);
2432 else
2433 base = NULL_RTX;
2435 if (base)
2436 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2438 start_sequence ();
2439 addr = memory_address (Pmode, addr);
2440 seq = get_insns ();
2441 end_sequence ();
2443 acost = seq_cost (seq);
2444 acost += address_cost (addr, Pmode);
2446 if (!acost)
2447 acost = 1;
2448 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2451 return cost + acost;
2454 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2455 the bitmap to that we should store it. */
2457 static struct ivopts_data *fd_ivopts_data;
2458 static tree
2459 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2461 bitmap *depends_on = data;
2462 struct version_info *info;
2464 if (TREE_CODE (*expr_p) != SSA_NAME)
2465 return NULL_TREE;
2466 info = name_info (fd_ivopts_data, *expr_p);
2468 if (!info->inv_id || info->has_nonlin_use)
2469 return NULL_TREE;
2471 if (!*depends_on)
2472 *depends_on = BITMAP_XMALLOC ();
2473 bitmap_set_bit (*depends_on, info->inv_id);
2475 return NULL_TREE;
2478 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2479 invariants the computation depends on. */
2481 static unsigned
2482 force_var_cost (struct ivopts_data *data,
2483 tree expr, bitmap *depends_on)
2485 static bool costs_initialized = false;
2486 static unsigned integer_cost;
2487 static unsigned symbol_cost;
2488 static unsigned address_cost;
2490 if (!costs_initialized)
2492 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2493 rtx x = gen_rtx_MEM (DECL_MODE (var),
2494 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2495 tree addr;
2496 tree type = build_pointer_type (integer_type_node);
2498 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2499 2000));
2501 SET_DECL_RTL (var, x);
2502 TREE_STATIC (var) = 1;
2503 addr = build1 (ADDR_EXPR, type, var);
2504 symbol_cost = computation_cost (addr) + 1;
2506 address_cost
2507 = computation_cost (build2 (PLUS_EXPR, type,
2508 addr,
2509 build_int_cst_type (type, 2000))) + 1;
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2512 fprintf (dump_file, "force_var_cost:\n");
2513 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2514 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2515 fprintf (dump_file, " address %d\n", (int) address_cost);
2516 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2517 fprintf (dump_file, "\n");
2520 costs_initialized = true;
2523 if (depends_on)
2525 fd_ivopts_data = data;
2526 walk_tree (&expr, find_depends, depends_on, NULL);
2529 if (SSA_VAR_P (expr))
2530 return 0;
2532 if (TREE_INVARIANT (expr))
2534 if (TREE_CODE (expr) == INTEGER_CST)
2535 return integer_cost;
2537 if (TREE_CODE (expr) == ADDR_EXPR)
2539 tree obj = TREE_OPERAND (expr, 0);
2541 if (TREE_CODE (obj) == VAR_DECL
2542 || TREE_CODE (obj) == PARM_DECL
2543 || TREE_CODE (obj) == RESULT_DECL)
2544 return symbol_cost;
2547 return address_cost;
2550 /* Just an arbitrary value, FIXME. */
2551 return target_spill_cost;
2554 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2555 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2556 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2557 invariants the computation depends on. */
2559 static unsigned
2560 split_address_cost (struct ivopts_data *data,
2561 tree addr, bool *symbol_present, bool *var_present,
2562 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2564 tree core;
2565 HOST_WIDE_INT bitsize;
2566 HOST_WIDE_INT bitpos;
2567 tree toffset;
2568 enum machine_mode mode;
2569 int unsignedp, volatilep;
2571 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2572 &unsignedp, &volatilep);
2574 if (toffset != 0
2575 || bitpos % BITS_PER_UNIT != 0
2576 || TREE_CODE (core) != VAR_DECL)
2578 *symbol_present = false;
2579 *var_present = true;
2580 fd_ivopts_data = data;
2581 walk_tree (&addr, find_depends, depends_on, NULL);
2582 return target_spill_cost;
2585 *offset += bitpos / BITS_PER_UNIT;
2586 if (TREE_STATIC (core)
2587 || DECL_EXTERNAL (core))
2589 *symbol_present = true;
2590 *var_present = false;
2591 return 0;
2594 *symbol_present = false;
2595 *var_present = true;
2596 return 0;
2599 /* Estimates cost of expressing difference of addresses E1 - E2 as
2600 var + symbol + offset. The value of offset is added to OFFSET,
2601 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2602 part is missing. DEPENDS_ON is a set of the invariants the computation
2603 depends on. */
2605 static unsigned
2606 ptr_difference_cost (struct ivopts_data *data,
2607 tree e1, tree e2, bool *symbol_present, bool *var_present,
2608 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2610 HOST_WIDE_INT diff = 0;
2611 unsigned cost;
2613 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2615 if (TREE_CODE (e2) == ADDR_EXPR
2616 && ptr_difference_const (TREE_OPERAND (e1, 0),
2617 TREE_OPERAND (e2, 0), &diff))
2619 *offset += diff;
2620 *symbol_present = false;
2621 *var_present = false;
2622 return 0;
2625 if (e2 == integer_zero_node)
2626 return split_address_cost (data, TREE_OPERAND (e1, 0),
2627 symbol_present, var_present, offset, depends_on);
2629 *symbol_present = false;
2630 *var_present = true;
2632 cost = force_var_cost (data, e1, depends_on);
2633 cost += force_var_cost (data, e2, depends_on);
2634 cost += add_cost (Pmode);
2636 return cost;
2639 /* Estimates cost of expressing difference E1 - E2 as
2640 var + symbol + offset. The value of offset is added to OFFSET,
2641 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2642 part is missing. DEPENDS_ON is a set of the invariants the computation
2643 depends on. */
2645 static unsigned
2646 difference_cost (struct ivopts_data *data,
2647 tree e1, tree e2, bool *symbol_present, bool *var_present,
2648 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2650 unsigned cost;
2651 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2653 strip_offset (&e1, offset);
2654 *offset = -*offset;
2655 strip_offset (&e2, offset);
2656 *offset = -*offset;
2658 if (TREE_CODE (e1) == ADDR_EXPR)
2659 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2660 depends_on);
2661 *symbol_present = false;
2663 if (operand_equal_p (e1, e2, 0))
2665 *var_present = false;
2666 return 0;
2668 *var_present = true;
2669 if (zero_p (e2))
2670 return force_var_cost (data, e1, depends_on);
2672 if (zero_p (e1))
2674 cost = force_var_cost (data, e2, depends_on);
2675 cost += multiply_by_cost (-1, mode);
2677 return cost;
2680 cost = force_var_cost (data, e1, depends_on);
2681 cost += force_var_cost (data, e2, depends_on);
2682 cost += add_cost (mode);
2684 return cost;
2687 /* Determines the cost of the computation by that USE is expressed
2688 from induction variable CAND. If ADDRESS_P is true, we just need
2689 to create an address from it, otherwise we want to get it into
2690 register. A set of invariants we depend on is stored in
2691 DEPENDS_ON. AT is the statement at that the value is computed. */
2693 static unsigned
2694 get_computation_cost_at (struct ivopts_data *data,
2695 struct iv_use *use, struct iv_cand *cand,
2696 bool address_p, bitmap *depends_on, tree at)
2698 tree ubase = use->iv->base, ustep = use->iv->step;
2699 tree cbase, cstep;
2700 tree utype = TREE_TYPE (ubase), ctype;
2701 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2702 HOST_WIDE_INT ratio, aratio;
2703 bool var_present, symbol_present;
2704 unsigned cost = 0, n_sums;
2706 *depends_on = NULL;
2708 /* Only consider real candidates. */
2709 if (!cand->iv)
2710 return INFTY;
2712 cbase = cand->iv->base;
2713 cstep = cand->iv->step;
2714 ctype = TREE_TYPE (cbase);
2716 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2718 /* We do not have a precision to express the values of use. */
2719 return INFTY;
2722 if (!cst_and_fits_in_hwi (ustep)
2723 || !cst_and_fits_in_hwi (cstep))
2724 return INFTY;
2726 if (TREE_CODE (ubase) == INTEGER_CST
2727 && !cst_and_fits_in_hwi (ubase))
2728 goto fallback;
2730 if (TREE_CODE (cbase) == INTEGER_CST
2731 && !cst_and_fits_in_hwi (cbase))
2732 goto fallback;
2734 ustepi = int_cst_value (ustep);
2735 cstepi = int_cst_value (cstep);
2737 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2739 /* TODO -- add direct handling of this case. */
2740 goto fallback;
2743 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2744 return INFTY;
2746 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2747 or ratio == 1, it is better to handle this like
2749 ubase - ratio * cbase + ratio * var
2751 (also holds in the case ratio == -1, TODO. */
2753 if (TREE_CODE (cbase) == INTEGER_CST)
2755 offset = - ratio * int_cst_value (cbase);
2756 cost += difference_cost (data,
2757 ubase, integer_zero_node,
2758 &symbol_present, &var_present, &offset,
2759 depends_on);
2761 else if (ratio == 1)
2763 cost += difference_cost (data,
2764 ubase, cbase,
2765 &symbol_present, &var_present, &offset,
2766 depends_on);
2768 else
2770 cost += force_var_cost (data, cbase, depends_on);
2771 cost += add_cost (TYPE_MODE (ctype));
2772 cost += difference_cost (data,
2773 ubase, integer_zero_node,
2774 &symbol_present, &var_present, &offset,
2775 depends_on);
2778 /* If we are after the increment, the value of the candidate is higher by
2779 one iteration. */
2780 if (stmt_after_increment (data->current_loop, cand, at))
2781 offset -= ratio * cstepi;
2783 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2784 (symbol/var/const parts may be omitted). If we are looking for an address,
2785 find the cost of addressing this. */
2786 if (address_p)
2787 return get_address_cost (symbol_present, var_present, offset, ratio);
2789 /* Otherwise estimate the costs for computing the expression. */
2790 aratio = ratio > 0 ? ratio : -ratio;
2791 if (!symbol_present && !var_present && !offset)
2793 if (ratio != 1)
2794 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2796 return cost;
2799 if (aratio != 1)
2800 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2802 n_sums = 1;
2803 if (var_present
2804 /* Symbol + offset should be compile-time computable. */
2805 && (symbol_present || offset))
2806 n_sums++;
2808 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2810 fallback:
2812 /* Just get the expression, expand it and measure the cost. */
2813 tree comp = get_computation_at (data->current_loop, use, cand, at);
2815 if (!comp)
2816 return INFTY;
2818 if (address_p)
2819 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2821 return computation_cost (comp);
2825 /* Determines the cost of the computation by that USE is expressed
2826 from induction variable CAND. If ADDRESS_P is true, we just need
2827 to create an address from it, otherwise we want to get it into
2828 register. A set of invariants we depend on is stored in
2829 DEPENDS_ON. */
2831 static unsigned
2832 get_computation_cost (struct ivopts_data *data,
2833 struct iv_use *use, struct iv_cand *cand,
2834 bool address_p, bitmap *depends_on)
2836 return get_computation_cost_at (data,
2837 use, cand, address_p, depends_on, use->stmt);
2840 /* Determines cost of basing replacement of USE on CAND in a generic
2841 expression. */
2843 static void
2844 determine_use_iv_cost_generic (struct ivopts_data *data,
2845 struct iv_use *use, struct iv_cand *cand)
2847 bitmap depends_on;
2848 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2850 set_use_iv_cost (data, use, cand, cost, depends_on);
2853 /* Determines cost of basing replacement of USE on CAND in an address. */
2855 static void
2856 determine_use_iv_cost_address (struct ivopts_data *data,
2857 struct iv_use *use, struct iv_cand *cand)
2859 bitmap depends_on;
2860 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2862 set_use_iv_cost (data, use, cand, cost, depends_on);
2865 /* Computes value of induction variable IV in iteration NITER. */
2867 static tree
2868 iv_value (struct iv *iv, tree niter)
2870 tree val;
2871 tree type = TREE_TYPE (iv->base);
2873 niter = fold_convert (type, niter);
2874 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2876 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2879 /* Computes value of candidate CAND at position AT in iteration NITER. */
2881 static tree
2882 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2884 tree val = iv_value (cand->iv, niter);
2885 tree type = TREE_TYPE (cand->iv->base);
2887 if (stmt_after_increment (loop, cand, at))
2888 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2890 return val;
2893 /* Check whether it is possible to express the condition in USE by comparison
2894 of candidate CAND. If so, store the comparison code to COMPARE and the
2895 value compared with to BOUND. */
2897 static bool
2898 may_eliminate_iv (struct loop *loop,
2899 struct iv_use *use, struct iv_cand *cand,
2900 enum tree_code *compare, tree *bound)
2902 edge exit;
2903 struct tree_niter_desc *niter, new_niter;
2904 tree wider_type, type, base;
2906 /* For now just very primitive -- we work just for the single exit condition,
2907 and are quite conservative about the possible overflows. TODO -- both of
2908 these can be improved. */
2909 exit = single_dom_exit (loop);
2910 if (!exit)
2911 return false;
2912 if (use->stmt != last_stmt (exit->src))
2913 return false;
2915 niter = &loop_data (loop)->niter;
2916 if (!niter->niter
2917 || !integer_nonzerop (niter->assumptions)
2918 || !integer_zerop (niter->may_be_zero))
2919 return false;
2921 if (exit->flags & EDGE_TRUE_VALUE)
2922 *compare = EQ_EXPR;
2923 else
2924 *compare = NE_EXPR;
2926 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2928 /* Let us check there is not some problem with overflows, by checking that
2929 the number of iterations is unchanged. */
2930 base = cand->iv->base;
2931 type = TREE_TYPE (base);
2932 if (stmt_after_increment (loop, cand, use->stmt))
2933 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2935 new_niter.niter = NULL_TREE;
2936 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2937 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2938 &new_niter);
2939 if (!new_niter.niter
2940 || !integer_nonzerop (new_niter.assumptions)
2941 || !integer_zerop (new_niter.may_be_zero))
2942 return false;
2944 wider_type = TREE_TYPE (new_niter.niter);
2945 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2946 wider_type = TREE_TYPE (niter->niter);
2947 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2948 fold_convert (wider_type, new_niter.niter), 0))
2949 return false;
2951 return true;
2954 /* Determines cost of basing replacement of USE on CAND in a condition. */
2956 static void
2957 determine_use_iv_cost_condition (struct ivopts_data *data,
2958 struct iv_use *use, struct iv_cand *cand)
2960 tree bound;
2961 enum tree_code compare;
2963 /* Only consider real candidates. */
2964 if (!cand->iv)
2966 set_use_iv_cost (data, use, cand, INFTY, NULL);
2967 return;
2970 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
2972 bitmap depends_on = NULL;
2973 unsigned cost = force_var_cost (data, bound, &depends_on);
2975 set_use_iv_cost (data, use, cand, cost, depends_on);
2976 return;
2979 /* The induction variable elimination failed; just express the original
2980 giv. If it is compared with an invariant, note that we cannot get
2981 rid of it. */
2982 if (TREE_CODE (*use->op_p) == SSA_NAME)
2983 record_invariant (data, *use->op_p, true);
2984 else
2986 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
2987 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
2990 determine_use_iv_cost_generic (data, use, cand);
2993 /* Checks whether it is possible to replace the final value of USE by
2994 a direct computation. If so, the formula is stored to *VALUE. */
2996 static bool
2997 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
2999 edge exit;
3000 struct tree_niter_desc *niter;
3002 exit = single_dom_exit (loop);
3003 if (!exit)
3004 return false;
3006 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3007 bb_for_stmt (use->stmt)));
3009 niter = &loop_data (loop)->niter;
3010 if (!niter->niter
3011 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3012 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3013 return false;
3015 *value = iv_value (use->iv, niter->niter);
3017 return true;
3020 /* Determines cost of replacing final value of USE using CAND. */
3022 static void
3023 determine_use_iv_cost_outer (struct ivopts_data *data,
3024 struct iv_use *use, struct iv_cand *cand)
3026 bitmap depends_on;
3027 unsigned cost;
3028 edge exit;
3029 tree value;
3030 struct loop *loop = data->current_loop;
3032 if (!cand->iv)
3034 if (!may_replace_final_value (loop, use, &value))
3036 set_use_iv_cost (data, use, cand, INFTY, NULL);
3037 return;
3040 depends_on = NULL;
3041 cost = force_var_cost (data, value, &depends_on);
3043 cost /= AVG_LOOP_NITER (loop);
3045 set_use_iv_cost (data, use, cand, cost, depends_on);
3046 return;
3049 exit = single_dom_exit (loop);
3050 if (exit)
3052 /* If there is just a single exit, we may use value of the candidate
3053 after we take it to determine the value of use. */
3054 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3055 last_stmt (exit->src));
3056 if (cost != INFTY)
3057 cost /= AVG_LOOP_NITER (loop);
3059 else
3061 /* Otherwise we just need to compute the iv. */
3062 cost = get_computation_cost (data, use, cand, false, &depends_on);
3065 set_use_iv_cost (data, use, cand, cost, depends_on);
3068 /* Determines cost of basing replacement of USE on CAND. */
3070 static void
3071 determine_use_iv_cost (struct ivopts_data *data,
3072 struct iv_use *use, struct iv_cand *cand)
3074 switch (use->type)
3076 case USE_NONLINEAR_EXPR:
3077 determine_use_iv_cost_generic (data, use, cand);
3078 break;
3080 case USE_OUTER:
3081 determine_use_iv_cost_outer (data, use, cand);
3082 break;
3084 case USE_ADDRESS:
3085 determine_use_iv_cost_address (data, use, cand);
3086 break;
3088 case USE_COMPARE:
3089 determine_use_iv_cost_condition (data, use, cand);
3090 break;
3092 default:
3093 gcc_unreachable ();
3097 /* Determines costs of basing the use of the iv on an iv candidate. */
3099 static void
3100 determine_use_iv_costs (struct ivopts_data *data)
3102 unsigned i, j;
3103 struct iv_use *use;
3104 struct iv_cand *cand;
3106 data->consider_all_candidates = (n_iv_cands (data)
3107 <= CONSIDER_ALL_CANDIDATES_BOUND);
3109 alloc_use_cost_map (data);
3111 if (!data->consider_all_candidates)
3113 /* Add the important candidate entries. */
3114 for (j = 0; j < n_iv_cands (data); j++)
3116 cand = iv_cand (data, j);
3117 if (!cand->important)
3118 continue;
3119 for (i = 0; i < n_iv_uses (data); i++)
3121 use = iv_use (data, i);
3122 determine_use_iv_cost (data, use, cand);
3127 for (i = 0; i < n_iv_uses (data); i++)
3129 use = iv_use (data, i);
3131 if (data->consider_all_candidates)
3133 for (j = 0; j < n_iv_cands (data); j++)
3135 cand = iv_cand (data, j);
3136 determine_use_iv_cost (data, use, cand);
3139 else
3141 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3143 cand = iv_cand (data, j);
3144 if (!cand->important)
3145 determine_use_iv_cost (data, use, cand);
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "Use-candidate costs:\n");
3154 for (i = 0; i < n_iv_uses (data); i++)
3156 use = iv_use (data, i);
3158 fprintf (dump_file, "Use %d:\n", i);
3159 fprintf (dump_file, " cand\tcost\tdepends on\n");
3160 for (j = 0; j < use->n_map_members; j++)
3162 if (!use->cost_map[j].cand
3163 || use->cost_map[j].cost == INFTY)
3164 continue;
3166 fprintf (dump_file, " %d\t%d\t",
3167 use->cost_map[j].cand->id,
3168 use->cost_map[j].cost);
3169 if (use->cost_map[j].depends_on)
3170 bitmap_print (dump_file,
3171 use->cost_map[j].depends_on, "","");
3172 fprintf (dump_file, "\n");
3175 fprintf (dump_file, "\n");
3177 fprintf (dump_file, "\n");
3181 /* Determines cost of the candidate CAND. */
3183 static void
3184 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3186 unsigned cost_base, cost_step;
3187 tree base, last;
3188 basic_block bb;
3190 if (!cand->iv)
3192 cand->cost = 0;
3193 return;
3196 /* There are two costs associated with the candidate -- its increment
3197 and its initialization. The second is almost negligible for any loop
3198 that rolls enough, so we take it just very little into account. */
3200 base = cand->iv->base;
3201 cost_base = force_var_cost (data, base, NULL);
3202 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3204 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3206 /* Prefer the original iv unless we may gain something by replacing it. */
3207 if (cand->pos == IP_ORIGINAL)
3208 cand->cost--;
3210 /* Prefer not to insert statements into latch unless there are some
3211 already (so that we do not create unnecessary jumps). */
3212 if (cand->pos == IP_END)
3214 bb = ip_end_pos (data->current_loop);
3215 last = last_stmt (bb);
3217 if (!last
3218 || TREE_CODE (last) == LABEL_EXPR)
3219 cand->cost++;
3223 /* Determines costs of computation of the candidates. */
3225 static void
3226 determine_iv_costs (struct ivopts_data *data)
3228 unsigned i;
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3232 fprintf (dump_file, "Candidate costs:\n");
3233 fprintf (dump_file, " cand\tcost\n");
3236 for (i = 0; i < n_iv_cands (data); i++)
3238 struct iv_cand *cand = iv_cand (data, i);
3240 determine_iv_cost (data, cand);
3242 if (dump_file && (dump_flags & TDF_DETAILS))
3243 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, "\n");
3250 /* Calculates cost for having SIZE induction variables. */
3252 static unsigned
3253 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3255 return global_cost_for_size (size,
3256 loop_data (data->current_loop)->regs_used,
3257 n_iv_uses (data));
3260 /* For each size of the induction variable set determine the penalty. */
3262 static void
3263 determine_set_costs (struct ivopts_data *data)
3265 unsigned j, n;
3266 tree phi, op;
3267 struct loop *loop = data->current_loop;
3269 /* We use the following model (definitely improvable, especially the
3270 cost function -- TODO):
3272 We estimate the number of registers available (using MD data), name it A.
3274 We estimate the number of registers used by the loop, name it U. This
3275 number is obtained as the number of loop phi nodes (not counting virtual
3276 registers and bivs) + the number of variables from outside of the loop.
3278 We set a reserve R (free regs that are used for temporary computations,
3279 etc.). For now the reserve is a constant 3.
3281 Let I be the number of induction variables.
3283 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3284 make a lot of ivs without a reason).
3285 -- if A - R < U + I <= A, the cost is I * PRES_COST
3286 -- if U + I > A, the cost is I * PRES_COST and
3287 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3289 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, "Global costs:\n");
3292 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3293 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3294 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3295 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3298 n = 0;
3299 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3301 op = PHI_RESULT (phi);
3303 if (!is_gimple_reg (op))
3304 continue;
3306 if (get_iv (data, op))
3307 continue;
3309 n++;
3312 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3314 struct version_info *info = ver_info (data, j);
3316 if (info->inv_id && info->has_nonlin_use)
3317 n++;
3320 loop_data (loop)->regs_used = n;
3321 if (dump_file && (dump_flags & TDF_DETAILS))
3322 fprintf (dump_file, " regs_used %d\n", n);
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3326 fprintf (dump_file, " cost for size:\n");
3327 fprintf (dump_file, " ivs\tcost\n");
3328 for (j = 0; j <= 2 * target_avail_regs; j++)
3329 fprintf (dump_file, " %d\t%d\n", j,
3330 ivopts_global_cost_for_size (data, j));
3331 fprintf (dump_file, "\n");
3335 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3336 taken from the set SOL and they may depend on invariants in the set INV.
3337 The really used candidate and invariants are noted to USED_IVS and
3338 USED_INV. */
3340 static unsigned
3341 find_best_candidate (struct ivopts_data *data,
3342 struct iv_use *use, bitmap sol, bitmap inv,
3343 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3345 unsigned c, d;
3346 unsigned best_cost = INFTY, cost;
3347 struct iv_cand *cnd = NULL, *acnd;
3348 bitmap depends_on = NULL, asol;
3350 if (data->consider_all_candidates)
3351 asol = sol;
3352 else
3354 asol = BITMAP_XMALLOC ();
3355 bitmap_a_and_b (asol, sol, use->related_cands);
3358 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3360 acnd = iv_cand (data, c);
3361 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3363 if (cost == INFTY)
3364 goto next_cand;
3365 if (cost > best_cost)
3366 goto next_cand;
3367 if (cost == best_cost)
3369 /* Prefer the cheaper iv. */
3370 if (acnd->cost >= cnd->cost)
3371 goto next_cand;
3374 if (depends_on)
3376 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3377 goto next_cand);
3378 if (used_inv)
3379 bitmap_a_or_b (used_inv, used_inv, depends_on);
3382 cnd = acnd;
3383 best_cost = cost;
3384 next_cand: ;
3387 if (cnd && used_ivs)
3388 bitmap_set_bit (used_ivs, cnd->id);
3390 if (cand)
3391 *cand = cnd;
3393 if (!data->consider_all_candidates)
3394 BITMAP_XFREE (asol);
3396 return best_cost;
3399 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3400 induction variable candidates and invariants from the sets. Only
3401 uses 0 .. MAX_USE - 1 are taken into account. */
3403 static unsigned
3404 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3405 unsigned max_use)
3407 unsigned i;
3408 unsigned cost = 0, size = 0, acost;
3409 struct iv_use *use;
3410 struct iv_cand *cand;
3411 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3413 for (i = 0; i < max_use; i++)
3415 use = iv_use (data, i);
3416 acost = find_best_candidate (data, use, sol, inv,
3417 used_ivs, used_inv, NULL);
3418 if (acost == INFTY)
3420 BITMAP_XFREE (used_ivs);
3421 BITMAP_XFREE (used_inv);
3422 return INFTY;
3424 cost += acost;
3427 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3429 cand = iv_cand (data, i);
3431 /* Do not count the pseudocandidates. */
3432 if (cand->iv)
3433 size++;
3435 cost += cand->cost;
3437 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3438 cost += ivopts_global_cost_for_size (data, size);
3440 bitmap_copy (sol, used_ivs);
3441 bitmap_copy (inv, used_inv);
3443 BITMAP_XFREE (used_ivs);
3444 BITMAP_XFREE (used_inv);
3446 return cost;
3449 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3450 induction variable candidates and invariants from the sets. */
3452 static unsigned
3453 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3455 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3458 /* Tries to extend the sets IVS and INV in the best possible way in order
3459 to express the USE. */
3461 static bool
3462 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3463 struct iv_use *use)
3465 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3466 bitmap best_ivs = BITMAP_XMALLOC ();
3467 bitmap best_inv = BITMAP_XMALLOC ();
3468 bitmap act_ivs = BITMAP_XMALLOC ();
3469 bitmap act_inv = BITMAP_XMALLOC ();
3470 unsigned i;
3471 struct cost_pair *cp;
3473 bitmap_copy (best_ivs, ivs);
3474 bitmap_copy (best_inv, inv);
3476 for (i = 0; i < use->n_map_members; i++)
3478 cp = use->cost_map + i;
3479 if (cp->cost == INFTY)
3480 continue;
3482 bitmap_copy (act_ivs, ivs);
3483 bitmap_set_bit (act_ivs, cp->cand->id);
3484 if (cp->depends_on)
3485 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3486 else
3487 bitmap_copy (act_inv, inv);
3488 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3490 if (act_cost < best_cost)
3492 best_cost = act_cost;
3493 bitmap_copy (best_ivs, act_ivs);
3494 bitmap_copy (best_inv, act_inv);
3498 bitmap_copy (ivs, best_ivs);
3499 bitmap_copy (inv, best_inv);
3501 BITMAP_XFREE (best_ivs);
3502 BITMAP_XFREE (best_inv);
3503 BITMAP_XFREE (act_ivs);
3504 BITMAP_XFREE (act_inv);
3506 return (best_cost != INFTY);
3509 /* Finds an initial set of IVS and invariants INV. We do this by simply
3510 choosing the best candidate for each use. */
3512 static unsigned
3513 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3515 unsigned i;
3517 for (i = 0; i < n_iv_uses (data); i++)
3518 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3519 return INFTY;
3521 return set_cost (data, ivs, inv);
3524 /* Tries to improve set of induction variables IVS and invariants INV to get
3525 it better than COST. */
3527 static bool
3528 try_improve_iv_set (struct ivopts_data *data,
3529 bitmap ivs, bitmap inv, unsigned *cost)
3531 unsigned i, acost;
3532 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3533 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3535 /* Try altering the set of induction variables by one. */
3536 for (i = 0; i < n_iv_cands (data); i++)
3538 bitmap_copy (new_ivs, ivs);
3539 bitmap_copy (new_inv, inv);
3541 if (bitmap_bit_p (ivs, i))
3542 bitmap_clear_bit (new_ivs, i);
3543 else
3544 bitmap_set_bit (new_ivs, i);
3546 acost = set_cost (data, new_ivs, new_inv);
3547 if (acost >= *cost)
3548 continue;
3550 if (!best_new_ivs)
3552 best_new_ivs = BITMAP_XMALLOC ();
3553 best_new_inv = BITMAP_XMALLOC ();
3556 *cost = acost;
3557 bitmap_copy (best_new_ivs, new_ivs);
3558 bitmap_copy (best_new_inv, new_inv);
3561 /* Ditto for invariants. */
3562 for (i = 1; i <= data->max_inv_id; i++)
3564 if (ver_info (data, i)->has_nonlin_use)
3565 continue;
3567 bitmap_copy (new_ivs, ivs);
3568 bitmap_copy (new_inv, inv);
3570 if (bitmap_bit_p (inv, i))
3571 bitmap_clear_bit (new_inv, i);
3572 else
3573 bitmap_set_bit (new_inv, i);
3575 acost = set_cost (data, new_ivs, new_inv);
3576 if (acost >= *cost)
3577 continue;
3579 if (!best_new_ivs)
3581 best_new_ivs = BITMAP_XMALLOC ();
3582 best_new_inv = BITMAP_XMALLOC ();
3585 *cost = acost;
3586 bitmap_copy (best_new_ivs, new_ivs);
3587 bitmap_copy (best_new_inv, new_inv);
3590 BITMAP_XFREE (new_ivs);
3591 BITMAP_XFREE (new_inv);
3593 if (!best_new_ivs)
3594 return false;
3596 bitmap_copy (ivs, best_new_ivs);
3597 bitmap_copy (inv, best_new_inv);
3598 BITMAP_XFREE (best_new_ivs);
3599 BITMAP_XFREE (best_new_inv);
3600 return true;
3603 /* Attempts to find the optimal set of induction variables. We do simple
3604 greedy heuristic -- we try to replace at most one candidate in the selected
3605 solution and remove the unused ivs while this improves the cost. */
3607 static bitmap
3608 find_optimal_iv_set (struct ivopts_data *data)
3610 unsigned cost, i;
3611 bitmap set = BITMAP_XMALLOC ();
3612 bitmap inv = BITMAP_XMALLOC ();
3613 struct iv_use *use;
3615 /* Set the upper bound. */
3616 cost = get_initial_solution (data, set, inv);
3617 if (cost == INFTY)
3619 if (dump_file && (dump_flags & TDF_DETAILS))
3620 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3621 BITMAP_XFREE (inv);
3622 BITMAP_XFREE (set);
3623 return NULL;
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3628 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3629 bitmap_print (dump_file, set, "", "");
3630 fprintf (dump_file, " invariants ");
3631 bitmap_print (dump_file, inv, "", "");
3632 fprintf (dump_file, "\n");
3635 while (try_improve_iv_set (data, set, inv, &cost))
3637 if (dump_file && (dump_flags & TDF_DETAILS))
3639 fprintf (dump_file, "Improved to (cost %d): ", cost);
3640 bitmap_print (dump_file, set, "", "");
3641 fprintf (dump_file, " invariants ");
3642 bitmap_print (dump_file, inv, "", "");
3643 fprintf (dump_file, "\n");
3647 if (dump_file && (dump_flags & TDF_DETAILS))
3648 fprintf (dump_file, "Final cost %d\n\n", cost);
3650 for (i = 0; i < n_iv_uses (data); i++)
3652 use = iv_use (data, i);
3653 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3656 BITMAP_XFREE (inv);
3658 return set;
3661 /* Creates a new induction variable corresponding to CAND. */
3663 static void
3664 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3666 block_stmt_iterator incr_pos;
3667 tree base;
3668 bool after = false;
3670 if (!cand->iv)
3671 return;
3673 switch (cand->pos)
3675 case IP_NORMAL:
3676 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3677 break;
3679 case IP_END:
3680 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3681 after = true;
3682 break;
3684 case IP_ORIGINAL:
3685 /* Mark that the iv is preserved. */
3686 name_info (data, cand->var_before)->preserve_biv = true;
3687 name_info (data, cand->var_after)->preserve_biv = true;
3689 /* Rewrite the increment so that it uses var_before directly. */
3690 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3692 return;
3695 gimple_add_tmp_var (cand->var_before);
3696 add_referenced_tmp_var (cand->var_before);
3698 base = unshare_expr (cand->iv->base);
3700 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3701 &incr_pos, after, &cand->var_before, &cand->var_after);
3704 /* Creates new induction variables described in SET. */
3706 static void
3707 create_new_ivs (struct ivopts_data *data, bitmap set)
3709 unsigned i;
3710 struct iv_cand *cand;
3712 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3714 cand = iv_cand (data, i);
3715 create_new_iv (data, cand);
3719 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3720 is true, remove also the ssa name defined by the statement. */
3722 static void
3723 remove_statement (tree stmt, bool including_defined_name)
3725 if (TREE_CODE (stmt) == PHI_NODE)
3727 if (!including_defined_name)
3729 /* Prevent the ssa name defined by the statement from being removed. */
3730 SET_PHI_RESULT (stmt, NULL);
3732 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3734 else
3736 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3738 bsi_remove (&bsi);
3742 /* Rewrites USE (definition of iv used in a nonlinear expression)
3743 using candidate CAND. */
3745 static void
3746 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3747 struct iv_use *use, struct iv_cand *cand)
3749 tree comp = unshare_expr (get_computation (data->current_loop,
3750 use, cand));
3751 tree op, stmts, tgt, ass;
3752 block_stmt_iterator bsi, pbsi;
3754 switch (TREE_CODE (use->stmt))
3756 case PHI_NODE:
3757 tgt = PHI_RESULT (use->stmt);
3759 /* If we should keep the biv, do not replace it. */
3760 if (name_info (data, tgt)->preserve_biv)
3761 return;
3763 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3764 while (!bsi_end_p (pbsi)
3765 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3767 bsi = pbsi;
3768 bsi_next (&pbsi);
3770 break;
3772 case MODIFY_EXPR:
3773 tgt = TREE_OPERAND (use->stmt, 0);
3774 bsi = stmt_for_bsi (use->stmt);
3775 break;
3777 default:
3778 gcc_unreachable ();
3781 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3783 if (TREE_CODE (use->stmt) == PHI_NODE)
3785 if (stmts)
3786 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3787 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3788 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3789 remove_statement (use->stmt, false);
3790 SSA_NAME_DEF_STMT (tgt) = ass;
3792 else
3794 if (stmts)
3795 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3796 TREE_OPERAND (use->stmt, 1) = op;
3800 /* Replaces ssa name in index IDX by its basic variable. Callback for
3801 for_each_index. */
3803 static bool
3804 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3805 void *data ATTRIBUTE_UNUSED)
3807 if (TREE_CODE (*idx) == SSA_NAME)
3808 *idx = SSA_NAME_VAR (*idx);
3809 return true;
3812 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3814 static tree
3815 unshare_and_remove_ssa_names (tree ref)
3817 ref = unshare_expr (ref);
3818 for_each_index (&ref, idx_remove_ssa_names, NULL);
3820 return ref;
3823 /* Rewrites base of memory access OP with expression WITH in statement
3824 pointed to by BSI. */
3826 static void
3827 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3829 tree var = get_base_address (*op), new_var, new_name, copy, name;
3830 tree orig;
3832 if (!var || TREE_CODE (with) != SSA_NAME)
3833 goto do_rewrite;
3835 if (TREE_CODE (var) == INDIRECT_REF)
3836 var = TREE_OPERAND (var, 0);
3837 if (TREE_CODE (var) == SSA_NAME)
3839 name = var;
3840 var = SSA_NAME_VAR (var);
3842 else if (DECL_P (var))
3843 name = NULL_TREE;
3844 else
3845 goto do_rewrite;
3847 if (var_ann (var)->type_mem_tag)
3848 var = var_ann (var)->type_mem_tag;
3850 /* We need to add a memory tag for the variable. But we do not want
3851 to add it to the temporary used for the computations, since this leads
3852 to problems in redundancy elimination when there are common parts
3853 in two computations referring to the different arrays. So we copy
3854 the variable to a new temporary. */
3855 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3856 if (name)
3857 new_name = duplicate_ssa_name (name, copy);
3858 else
3860 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3861 add_referenced_tmp_var (new_var);
3862 var_ann (new_var)->type_mem_tag = var;
3863 new_name = make_ssa_name (new_var, copy);
3865 TREE_OPERAND (copy, 0) = new_name;
3866 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3867 with = new_name;
3869 do_rewrite:
3871 orig = NULL_TREE;
3872 if (TREE_CODE (*op) == INDIRECT_REF)
3873 orig = REF_ORIGINAL (*op);
3874 if (!orig)
3875 orig = unshare_and_remove_ssa_names (*op);
3877 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3878 /* Record the original reference, for purposes of alias analysis. */
3879 REF_ORIGINAL (*op) = orig;
3882 /* Rewrites USE (address that is an iv) using candidate CAND. */
3884 static void
3885 rewrite_use_address (struct ivopts_data *data,
3886 struct iv_use *use, struct iv_cand *cand)
3888 tree comp = unshare_expr (get_computation (data->current_loop,
3889 use, cand));
3890 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3891 tree stmts;
3892 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3894 if (stmts)
3895 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3897 rewrite_address_base (&bsi, use->op_p, op);
3900 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3901 candidate CAND. */
3903 static void
3904 rewrite_use_compare (struct ivopts_data *data,
3905 struct iv_use *use, struct iv_cand *cand)
3907 tree comp;
3908 tree *op_p, cond, op, stmts, bound;
3909 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3910 enum tree_code compare;
3912 if (may_eliminate_iv (data->current_loop,
3913 use, cand, &compare, &bound))
3915 op = force_gimple_operand (unshare_expr (bound), &stmts,
3916 true, NULL_TREE);
3918 if (stmts)
3919 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3921 *use->op_p = build2 (compare, boolean_type_node,
3922 var_at_stmt (data->current_loop,
3923 cand, use->stmt), op);
3924 modify_stmt (use->stmt);
3925 return;
3928 /* The induction variable elimination failed; just express the original
3929 giv. */
3930 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3932 cond = *use->op_p;
3933 op_p = &TREE_OPERAND (cond, 0);
3934 if (TREE_CODE (*op_p) != SSA_NAME
3935 || zero_p (get_iv (data, *op_p)->step))
3936 op_p = &TREE_OPERAND (cond, 1);
3938 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3939 if (stmts)
3940 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3942 *op_p = op;
3945 /* Ensure that operand *OP_P may be used at the end of EXIT without
3946 violating loop closed ssa form. */
3948 static void
3949 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3951 basic_block def_bb;
3952 struct loop *def_loop;
3953 tree phi, use;
3955 use = USE_FROM_PTR (op_p);
3956 if (TREE_CODE (use) != SSA_NAME)
3957 return;
3959 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3960 if (!def_bb)
3961 return;
3963 def_loop = def_bb->loop_father;
3964 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3965 return;
3967 /* Try finding a phi node that copies the value out of the loop. */
3968 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
3969 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
3970 break;
3972 if (!phi)
3974 /* Create such a phi node. */
3975 tree new_name = duplicate_ssa_name (use, NULL);
3977 phi = create_phi_node (new_name, exit->dest);
3978 SSA_NAME_DEF_STMT (new_name) = phi;
3979 add_phi_arg (&phi, use, exit);
3982 SET_USE (op_p, PHI_RESULT (phi));
3985 /* Ensure that operands of STMT may be used at the end of EXIT without
3986 violating loop closed ssa form. */
3988 static void
3989 protect_loop_closed_ssa_form (edge exit, tree stmt)
3991 use_optype uses;
3992 vuse_optype vuses;
3993 v_may_def_optype v_may_defs;
3994 unsigned i;
3996 get_stmt_operands (stmt);
3998 uses = STMT_USE_OPS (stmt);
3999 for (i = 0; i < NUM_USES (uses); i++)
4000 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4002 vuses = STMT_VUSE_OPS (stmt);
4003 for (i = 0; i < NUM_VUSES (vuses); i++)
4004 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4006 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4007 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4008 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4011 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4012 so that they are emitted on the correct place, and so that the loop closed
4013 ssa form is preserved. */
4015 static void
4016 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4018 tree_stmt_iterator tsi;
4019 block_stmt_iterator bsi;
4020 tree phi, stmt, def, next;
4022 if (exit->dest->pred->pred_next)
4023 split_loop_exit_edge (exit);
4025 if (TREE_CODE (stmts) == STATEMENT_LIST)
4027 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4028 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4030 else
4031 protect_loop_closed_ssa_form (exit, stmts);
4033 /* Ensure there is label in exit->dest, so that we can
4034 insert after it. */
4035 tree_block_label (exit->dest);
4036 bsi = bsi_after_labels (exit->dest);
4037 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4039 if (!op)
4040 return;
4042 for (phi = phi_nodes (exit->dest); phi; phi = next)
4044 next = TREE_CHAIN (phi);
4046 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4048 def = PHI_RESULT (phi);
4049 remove_statement (phi, false);
4050 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4051 def, op);
4052 SSA_NAME_DEF_STMT (def) = stmt;
4053 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4058 /* Rewrites the final value of USE (that is only needed outside of the loop)
4059 using candidate CAND. */
4061 static void
4062 rewrite_use_outer (struct ivopts_data *data,
4063 struct iv_use *use, struct iv_cand *cand)
4065 edge exit;
4066 tree value, op, stmts, tgt;
4067 tree phi;
4069 switch (TREE_CODE (use->stmt))
4071 case PHI_NODE:
4072 tgt = PHI_RESULT (use->stmt);
4073 break;
4074 case MODIFY_EXPR:
4075 tgt = TREE_OPERAND (use->stmt, 0);
4076 break;
4077 default:
4078 gcc_unreachable ();
4081 exit = single_dom_exit (data->current_loop);
4083 if (exit)
4085 if (!cand->iv)
4087 bool ok = may_replace_final_value (data->current_loop, use, &value);
4088 gcc_assert (ok);
4090 else
4091 value = get_computation_at (data->current_loop,
4092 use, cand, last_stmt (exit->src));
4094 value = unshare_expr (value);
4095 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4097 /* If we will preserve the iv anyway and we would need to perform
4098 some computation to replace the final value, do nothing. */
4099 if (stmts && name_info (data, tgt)->preserve_biv)
4100 return;
4102 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4104 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4106 if (USE_FROM_PTR (use_p) == tgt)
4107 SET_USE (use_p, op);
4110 if (stmts)
4111 compute_phi_arg_on_exit (exit, stmts, op);
4113 /* Enable removal of the statement. We cannot remove it directly,
4114 since we may still need the aliasing information attached to the
4115 ssa name defined by it. */
4116 name_info (data, tgt)->iv->have_use_for = false;
4117 return;
4120 /* If the variable is going to be preserved anyway, there is nothing to
4121 do. */
4122 if (name_info (data, tgt)->preserve_biv)
4123 return;
4125 /* Otherwise we just need to compute the iv. */
4126 rewrite_use_nonlinear_expr (data, use, cand);
4129 /* Rewrites USE using candidate CAND. */
4131 static void
4132 rewrite_use (struct ivopts_data *data,
4133 struct iv_use *use, struct iv_cand *cand)
4135 switch (use->type)
4137 case USE_NONLINEAR_EXPR:
4138 rewrite_use_nonlinear_expr (data, use, cand);
4139 break;
4141 case USE_OUTER:
4142 rewrite_use_outer (data, use, cand);
4143 break;
4145 case USE_ADDRESS:
4146 rewrite_use_address (data, use, cand);
4147 break;
4149 case USE_COMPARE:
4150 rewrite_use_compare (data, use, cand);
4151 break;
4153 default:
4154 gcc_unreachable ();
4156 modify_stmt (use->stmt);
4159 /* Rewrite the uses using the selected induction variables. */
4161 static void
4162 rewrite_uses (struct ivopts_data *data)
4164 unsigned i;
4165 struct iv_cand *cand;
4166 struct iv_use *use;
4168 for (i = 0; i < n_iv_uses (data); i++)
4170 use = iv_use (data, i);
4171 cand = use->selected;
4172 gcc_assert (cand);
4174 rewrite_use (data, use, cand);
4178 /* Removes the ivs that are not used after rewriting. */
4180 static void
4181 remove_unused_ivs (struct ivopts_data *data)
4183 unsigned j;
4185 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4187 struct version_info *info;
4189 info = ver_info (data, j);
4190 if (info->iv
4191 && !zero_p (info->iv->step)
4192 && !info->inv_id
4193 && !info->iv->have_use_for
4194 && !info->preserve_biv)
4195 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4199 /* Frees data allocated by the optimization of a single loop. */
4201 static void
4202 free_loop_data (struct ivopts_data *data)
4204 unsigned i, j;
4206 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4208 struct version_info *info;
4210 info = ver_info (data, i);
4211 if (info->iv)
4212 free (info->iv);
4213 info->iv = NULL;
4214 info->has_nonlin_use = false;
4215 info->preserve_biv = false;
4216 info->inv_id = 0;
4218 bitmap_clear (data->relevant);
4220 for (i = 0; i < n_iv_uses (data); i++)
4222 struct iv_use *use = iv_use (data, i);
4224 free (use->iv);
4225 BITMAP_XFREE (use->related_cands);
4226 for (j = 0; j < use->n_map_members; j++)
4227 if (use->cost_map[j].depends_on)
4228 BITMAP_XFREE (use->cost_map[j].depends_on);
4229 free (use->cost_map);
4230 free (use);
4232 VARRAY_POP_ALL (data->iv_uses);
4234 for (i = 0; i < n_iv_cands (data); i++)
4236 struct iv_cand *cand = iv_cand (data, i);
4238 if (cand->iv)
4239 free (cand->iv);
4240 free (cand);
4242 VARRAY_POP_ALL (data->iv_candidates);
4244 if (data->version_info_size < num_ssa_names)
4246 data->version_info_size = 2 * num_ssa_names;
4247 free (data->version_info);
4248 data->version_info = xcalloc (data->version_info_size,
4249 sizeof (struct version_info));
4252 data->max_inv_id = 0;
4254 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4256 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4258 SET_DECL_RTL (obj, NULL_RTX);
4260 VARRAY_POP_ALL (decl_rtl_to_reset);
4263 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4264 loop tree. */
4266 static void
4267 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4269 unsigned i;
4271 for (i = 1; i < loops->num; i++)
4272 if (loops->parray[i])
4274 free (loops->parray[i]->aux);
4275 loops->parray[i]->aux = NULL;
4278 free_loop_data (data);
4279 free (data->version_info);
4280 BITMAP_XFREE (data->relevant);
4282 VARRAY_FREE (decl_rtl_to_reset);
4283 VARRAY_FREE (data->iv_uses);
4284 VARRAY_FREE (data->iv_candidates);
4287 /* Optimizes the LOOP. Returns true if anything changed. */
4289 static bool
4290 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4292 bool changed = false;
4293 bitmap iv_set;
4294 edge exit;
4296 data->current_loop = loop;
4298 if (dump_file && (dump_flags & TDF_DETAILS))
4300 fprintf (dump_file, "Processing loop %d\n", loop->num);
4302 exit = single_dom_exit (loop);
4303 if (exit)
4305 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4306 exit->src->index, exit->dest->index);
4307 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4308 fprintf (dump_file, "\n");
4311 fprintf (dump_file, "\n");
4314 /* For each ssa name determines whether it behaves as an induction variable
4315 in some loop. */
4316 if (!find_induction_variables (data))
4317 goto finish;
4319 /* Finds interesting uses (item 1). */
4320 find_interesting_uses (data);
4321 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4322 goto finish;
4324 /* Finds candidates for the induction variables (item 2). */
4325 find_iv_candidates (data);
4327 /* Calculates the costs (item 3, part 1). */
4328 determine_use_iv_costs (data);
4329 determine_iv_costs (data);
4330 determine_set_costs (data);
4332 /* Find the optimal set of induction variables (item 3, part 2). */
4333 iv_set = find_optimal_iv_set (data);
4334 if (!iv_set)
4335 goto finish;
4336 changed = true;
4338 /* Create the new induction variables (item 4, part 1). */
4339 create_new_ivs (data, iv_set);
4341 /* Rewrite the uses (item 4, part 2). */
4342 rewrite_uses (data);
4344 /* Remove the ivs that are unused after rewriting. */
4345 remove_unused_ivs (data);
4347 loop_commit_inserts ();
4349 BITMAP_XFREE (iv_set);
4351 /* We have changed the structure of induction variables; it might happen
4352 that definitions in the scev database refer to some of them that were
4353 eliminated. */
4354 scev_reset ();
4356 finish:
4357 free_loop_data (data);
4359 return changed;
4362 /* Main entry point. Optimizes induction variables in LOOPS. */
4364 void
4365 tree_ssa_iv_optimize (struct loops *loops)
4367 struct loop *loop;
4368 struct ivopts_data data;
4370 tree_ssa_iv_optimize_init (loops, &data);
4372 /* Optimize the loops starting with the innermost ones. */
4373 loop = loops->tree_root;
4374 while (loop->inner)
4375 loop = loop->inner;
4377 #ifdef ENABLE_CHECKING
4378 verify_loop_closed_ssa ();
4379 verify_stmts ();
4380 #endif
4382 /* Scan the loops, inner ones first. */
4383 while (loop != loops->tree_root)
4385 if (dump_file && (dump_flags & TDF_DETAILS))
4386 flow_loop_dump (loop, dump_file, NULL, 1);
4387 if (tree_ssa_iv_optimize_loop (&data, loop))
4389 #ifdef ENABLE_CHECKING
4390 verify_loop_closed_ssa ();
4391 verify_stmts ();
4392 #endif
4395 if (loop->next)
4397 loop = loop->next;
4398 while (loop->inner)
4399 loop = loop->inner;
4401 else
4402 loop = loop->outer;
4405 tree_ssa_iv_optimize_finalize (loops, &data);