PR tree-optimization/17468
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobc8e3762a5e9601ba18dd8566436dd37e45d3672f
1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
126 struct loop_data
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
134 /* Types of uses. */
135 enum use_type
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
144 struct cost_pair
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
149 preserved. */
152 /* Use. */
153 struct iv_use
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 struct ivopts_data
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
222 use. */
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
238 here. */
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
279 return loop->aux;
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
284 static edge
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
289 if (!exit)
290 return NULL;
292 if (!just_once_each_iteration_p (loop, exit->src))
293 return NULL;
295 return exit;
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
301 void
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 if (iv->step)
310 fprintf (file, " base ");
311 print_generic_expr (file, iv->base, TDF_SLIM);
312 fprintf (file, "\n");
314 fprintf (file, " step ");
315 print_generic_expr (file, iv->step, TDF_SLIM);
316 fprintf (file, "\n");
318 else
320 fprintf (file, " invariant ");
321 print_generic_expr (file, iv->base, TDF_SLIM);
322 fprintf (file, "\n");
325 if (iv->biv_p)
326 fprintf (file, " is a biv\n");
329 /* Dumps information about the USE to FILE. */
331 extern void dump_use (FILE *, struct iv_use *);
332 void
333 dump_use (FILE *file, struct iv_use *use)
335 struct iv *iv = use->iv;
337 fprintf (file, "use %d\n", use->id);
339 switch (use->type)
341 case USE_NONLINEAR_EXPR:
342 fprintf (file, " generic\n");
343 break;
345 case USE_OUTER:
346 fprintf (file, " outside\n");
347 break;
349 case USE_ADDRESS:
350 fprintf (file, " address\n");
351 break;
353 case USE_COMPARE:
354 fprintf (file, " compare\n");
355 break;
357 default:
358 gcc_unreachable ();
361 fprintf (file, " in statement ");
362 print_generic_expr (file, use->stmt, TDF_SLIM);
363 fprintf (file, "\n");
365 fprintf (file, " at position ");
366 if (use->op_p)
367 print_generic_expr (file, *use->op_p, TDF_SLIM);
368 fprintf (file, "\n");
370 if (iv->step)
372 fprintf (file, " base ");
373 print_generic_expr (file, iv->base, TDF_SLIM);
374 fprintf (file, "\n");
376 fprintf (file, " step ");
377 print_generic_expr (file, iv->step, TDF_SLIM);
378 fprintf (file, "\n");
380 else
382 fprintf (file, " invariant ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " related candidates ");
388 dump_bitmap (file, use->related_cands);
391 /* Dumps information about the uses to FILE. */
393 extern void dump_uses (FILE *, struct ivopts_data *);
394 void
395 dump_uses (FILE *file, struct ivopts_data *data)
397 unsigned i;
398 struct iv_use *use;
400 for (i = 0; i < n_iv_uses (data); i++)
402 use = iv_use (data, i);
404 dump_use (file, use);
405 fprintf (file, "\n");
409 /* Dumps information about induction variable candidate CAND to FILE. */
411 extern void dump_cand (FILE *, struct iv_cand *);
412 void
413 dump_cand (FILE *file, struct iv_cand *cand)
415 struct iv *iv = cand->iv;
417 fprintf (file, "candidate %d%s\n",
418 cand->id, cand->important ? " (important)" : "");
420 if (!iv)
422 fprintf (file, " final value replacement\n");
423 return;
426 switch (cand->pos)
428 case IP_NORMAL:
429 fprintf (file, " incremented before exit test\n");
430 break;
432 case IP_END:
433 fprintf (file, " incremented at end\n");
434 break;
436 case IP_ORIGINAL:
437 fprintf (file, " original biv\n");
438 break;
441 if (iv->step)
443 fprintf (file, " base ");
444 print_generic_expr (file, iv->base, TDF_SLIM);
445 fprintf (file, "\n");
447 fprintf (file, " step ");
448 print_generic_expr (file, iv->step, TDF_SLIM);
449 fprintf (file, "\n");
451 else
453 fprintf (file, " invariant ");
454 print_generic_expr (file, iv->base, TDF_SLIM);
455 fprintf (file, "\n");
459 /* Returns the info for ssa version VER. */
461 static inline struct version_info *
462 ver_info (struct ivopts_data *data, unsigned ver)
464 return data->version_info + ver;
467 /* Returns the info for ssa name NAME. */
469 static inline struct version_info *
470 name_info (struct ivopts_data *data, tree name)
472 return ver_info (data, SSA_NAME_VERSION (name));
475 /* Checks whether there exists number X such that X * B = A, counting modulo
476 2^BITS. */
478 static bool
479 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
480 HOST_WIDE_INT *x)
482 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
483 unsigned HOST_WIDE_INT inv, ex, val;
484 unsigned i;
486 a &= mask;
487 b &= mask;
489 /* First divide the whole equation by 2 as long as possible. */
490 while (!(a & 1) && !(b & 1))
492 a >>= 1;
493 b >>= 1;
494 bits--;
495 mask >>= 1;
498 if (!(b & 1))
500 /* If b is still even, a is odd and there is no such x. */
501 return false;
504 /* Find the inverse of b. We compute it as
505 b^(2^(bits - 1) - 1) (mod 2^bits). */
506 inv = 1;
507 ex = b;
508 for (i = 0; i < bits - 1; i++)
510 inv = (inv * ex) & mask;
511 ex = (ex * ex) & mask;
514 val = (a * inv) & mask;
516 gcc_assert (((val * b) & mask) == a);
518 if ((val >> (bits - 1)) & 1)
519 val |= ~mask;
521 *x = val;
523 return true;
526 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
527 emitted in LOOP. */
529 static bool
530 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
532 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
534 gcc_assert (bb);
536 if (sbb == loop->latch)
537 return true;
539 if (sbb != bb)
540 return false;
542 return stmt == last_stmt (bb);
545 /* Returns true if STMT if after the place where the original induction
546 variable CAND is incremented. */
548 static bool
549 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
551 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
552 basic_block stmt_bb = bb_for_stmt (stmt);
553 block_stmt_iterator bsi;
555 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
556 return false;
558 if (stmt_bb != cand_bb)
559 return true;
561 /* Scan the block from the end, since the original ivs are usually
562 incremented at the end of the loop body. */
563 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
565 if (bsi_stmt (bsi) == cand->incremented_at)
566 return false;
567 if (bsi_stmt (bsi) == stmt)
568 return true;
572 /* Returns true if STMT if after the place where the induction variable
573 CAND is incremented in LOOP. */
575 static bool
576 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
578 switch (cand->pos)
580 case IP_END:
581 return false;
583 case IP_NORMAL:
584 return stmt_after_ip_normal_pos (loop, stmt);
586 case IP_ORIGINAL:
587 return stmt_after_ip_original_pos (cand, stmt);
589 default:
590 gcc_unreachable ();
594 /* Initializes data structures used by the iv optimization pass, stored
595 in DATA. LOOPS is the loop tree. */
597 static void
598 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
600 unsigned i;
602 data->version_info_size = 2 * num_ssa_names;
603 data->version_info = xcalloc (data->version_info_size,
604 sizeof (struct version_info));
605 data->relevant = BITMAP_XMALLOC ();
606 data->max_inv_id = 0;
608 for (i = 1; i < loops->num; i++)
609 if (loops->parray[i])
610 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
612 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
613 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
614 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
617 /* Allocates an induction variable with given initial value BASE and step STEP
618 for loop LOOP. */
620 static struct iv *
621 alloc_iv (tree base, tree step)
623 struct iv *iv = xcalloc (1, sizeof (struct iv));
625 if (step && integer_zerop (step))
626 step = NULL_TREE;
628 iv->base = base;
629 iv->step = step;
630 iv->biv_p = false;
631 iv->have_use_for = false;
632 iv->use_id = 0;
633 iv->ssa_name = NULL_TREE;
635 return iv;
638 /* Sets STEP and BASE for induction variable IV. */
640 static void
641 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
643 struct version_info *info = name_info (data, iv);
645 gcc_assert (!info->iv);
647 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
648 info->iv = alloc_iv (base, step);
649 info->iv->ssa_name = iv;
652 /* Finds induction variable declaration for VAR. */
654 static struct iv *
655 get_iv (struct ivopts_data *data, tree var)
657 basic_block bb;
659 if (!name_info (data, var)->iv)
661 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
663 if (!bb
664 || !flow_bb_inside_loop_p (data->current_loop, bb))
665 set_iv (data, var, var, NULL_TREE);
668 return name_info (data, var)->iv;
671 /* Determines the step of a biv defined in PHI. */
673 static tree
674 determine_biv_step (tree phi)
676 struct loop *loop = bb_for_stmt (phi)->loop_father;
677 tree name = PHI_RESULT (phi), base, step;
678 tree type = TREE_TYPE (name);
680 if (!is_gimple_reg (name))
681 return NULL_TREE;
683 if (!simple_iv (loop, phi, name, &base, &step))
684 return NULL_TREE;
686 if (!step)
687 return build_int_cst (type, 0);
689 return step;
692 /* Returns false if INDEX is a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
695 static bool
696 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
697 void *data ATTRIBUTE_UNUSED)
699 if (TREE_CODE (*index) != SSA_NAME)
700 return true;
702 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
708 static bool
709 contains_abnormal_ssa_name_p (tree expr)
711 enum tree_code code = TREE_CODE (expr);
712 char class = TREE_CODE_CLASS (code);
714 if (code == SSA_NAME)
715 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
717 if (code == INTEGER_CST
718 || is_gimple_min_invariant (expr))
719 return false;
721 if (code == ADDR_EXPR)
722 return !for_each_index (&TREE_OPERAND (expr, 1),
723 idx_contains_abnormal_ssa_name_p,
724 NULL);
726 switch (class)
728 case '2':
729 case '<':
730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
731 return true;
733 /* Fallthru. */
734 case '1':
735 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
736 return true;
738 break;
740 default:
741 gcc_unreachable ();
744 return false;
747 /* Finds basic ivs. */
749 static bool
750 find_bivs (struct ivopts_data *data)
752 tree phi, step, type, base;
753 bool found = false;
754 struct loop *loop = data->current_loop;
756 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
758 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
759 continue;
761 step = determine_biv_step (phi);
763 if (!step)
764 continue;
765 if (cst_and_fits_in_hwi (step)
766 && int_cst_value (step) == 0)
767 continue;
769 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
770 if (contains_abnormal_ssa_name_p (base))
771 continue;
773 type = TREE_TYPE (PHI_RESULT (phi));
774 base = fold_convert (type, base);
775 step = fold_convert (type, step);
777 /* FIXME: We do not handle induction variables whose step does
778 not satisfy cst_and_fits_in_hwi. */
779 if (!cst_and_fits_in_hwi (step))
780 continue;
782 set_iv (data, PHI_RESULT (phi), base, step);
783 found = true;
786 return found;
789 /* Marks basic ivs. */
791 static void
792 mark_bivs (struct ivopts_data *data)
794 tree phi, var;
795 struct iv *iv, *incr_iv;
796 struct loop *loop = data->current_loop;
797 basic_block incr_bb;
799 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
801 iv = get_iv (data, PHI_RESULT (phi));
802 if (!iv)
803 continue;
805 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
806 incr_iv = get_iv (data, var);
807 if (!incr_iv)
808 continue;
810 /* If the increment is in the subloop, ignore it. */
811 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
812 if (incr_bb->loop_father != data->current_loop
813 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
814 continue;
816 iv->biv_p = true;
817 incr_iv->biv_p = true;
821 /* Checks whether STMT defines a linear induction variable and stores its
822 parameters to BASE and STEP. */
824 static bool
825 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
826 tree *base, tree *step)
828 tree lhs;
829 struct loop *loop = data->current_loop;
831 *base = NULL_TREE;
832 *step = NULL_TREE;
834 if (TREE_CODE (stmt) != MODIFY_EXPR)
835 return false;
837 lhs = TREE_OPERAND (stmt, 0);
838 if (TREE_CODE (lhs) != SSA_NAME)
839 return false;
841 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
842 return false;
844 /* FIXME: We do not handle induction variables whose step does
845 not satisfy cst_and_fits_in_hwi. */
846 if (!zero_p (*step)
847 && !cst_and_fits_in_hwi (*step))
848 return false;
850 if (contains_abnormal_ssa_name_p (*base))
851 return false;
853 return true;
856 /* Finds general ivs in statement STMT. */
858 static void
859 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
861 tree base, step;
863 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
864 return;
866 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
869 /* Finds general ivs in basic block BB. */
871 static void
872 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
874 block_stmt_iterator bsi;
876 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
877 find_givs_in_stmt (data, bsi_stmt (bsi));
880 /* Finds general ivs. */
882 static void
883 find_givs (struct ivopts_data *data)
885 struct loop *loop = data->current_loop;
886 basic_block *body = get_loop_body_in_dom_order (loop);
887 unsigned i;
889 for (i = 0; i < loop->num_nodes; i++)
890 find_givs_in_bb (data, body[i]);
891 free (body);
894 /* Determine the number of iterations of the current loop. */
896 static void
897 determine_number_of_iterations (struct ivopts_data *data)
899 struct loop *loop = data->current_loop;
900 edge exit = single_dom_exit (loop);
902 if (!exit)
903 return;
905 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
908 /* For each ssa name defined in LOOP determines whether it is an induction
909 variable and if so, its initial value and step. */
911 static bool
912 find_induction_variables (struct ivopts_data *data)
914 unsigned i;
915 struct loop *loop = data->current_loop;
917 if (!find_bivs (data))
918 return false;
920 find_givs (data);
921 mark_bivs (data);
922 determine_number_of_iterations (data);
924 if (dump_file && (dump_flags & TDF_DETAILS))
926 if (loop_data (loop)->niter.niter)
928 fprintf (dump_file, " number of iterations ");
929 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
930 TDF_SLIM);
931 fprintf (dump_file, "\n");
933 fprintf (dump_file, " may be zero if ");
934 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
935 TDF_SLIM);
936 fprintf (dump_file, "\n");
938 fprintf (dump_file, " bogus unless ");
939 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
940 TDF_SLIM);
941 fprintf (dump_file, "\n");
942 fprintf (dump_file, "\n");
945 fprintf (dump_file, "Induction variables:\n\n");
947 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
949 if (ver_info (data, i)->iv)
950 dump_iv (dump_file, ver_info (data, i)->iv);
954 return true;
957 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
959 static struct iv_use *
960 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
961 tree stmt, enum use_type use_type)
963 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
965 use->id = n_iv_uses (data);
966 use->type = use_type;
967 use->iv = iv;
968 use->stmt = stmt;
969 use->op_p = use_p;
970 use->related_cands = BITMAP_XMALLOC ();
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 dump_use (dump_file, use);
975 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
977 return use;
980 /* Checks whether OP is a loop-level invariant and if so, records it.
981 NONLINEAR_USE is true if the invariant is used in a way we do not
982 handle specially. */
984 static void
985 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
987 basic_block bb;
988 struct version_info *info;
990 if (TREE_CODE (op) != SSA_NAME
991 || !is_gimple_reg (op))
992 return;
994 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
995 if (bb
996 && flow_bb_inside_loop_p (data->current_loop, bb))
997 return;
999 info = name_info (data, op);
1000 info->name = op;
1001 info->has_nonlin_use |= nonlinear_use;
1002 if (!info->inv_id)
1003 info->inv_id = ++data->max_inv_id;
1004 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1007 /* Checks whether the use OP is interesting and if so, records it
1008 as TYPE. */
1010 static struct iv_use *
1011 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1012 enum use_type type)
1014 struct iv *iv;
1015 struct iv *civ;
1016 tree stmt;
1017 struct iv_use *use;
1019 if (TREE_CODE (op) != SSA_NAME)
1020 return NULL;
1022 iv = get_iv (data, op);
1023 if (!iv)
1024 return NULL;
1026 if (iv->have_use_for)
1028 use = iv_use (data, iv->use_id);
1030 gcc_assert (use->type == USE_NONLINEAR_EXPR
1031 || use->type == USE_OUTER);
1033 if (type == USE_NONLINEAR_EXPR)
1034 use->type = USE_NONLINEAR_EXPR;
1035 return use;
1038 if (zero_p (iv->step))
1040 record_invariant (data, op, true);
1041 return NULL;
1043 iv->have_use_for = true;
1045 civ = xmalloc (sizeof (struct iv));
1046 *civ = *iv;
1048 stmt = SSA_NAME_DEF_STMT (op);
1049 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1050 || TREE_CODE (stmt) == MODIFY_EXPR);
1052 use = record_use (data, NULL, civ, stmt, type);
1053 iv->use_id = use->id;
1055 return use;
1058 /* Checks whether the use OP is interesting and if so, records it. */
1060 static struct iv_use *
1061 find_interesting_uses_op (struct ivopts_data *data, tree op)
1063 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1066 /* Records a definition of induction variable OP that is used outside of the
1067 loop. */
1069 static struct iv_use *
1070 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1072 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1075 /* Checks whether the condition *COND_P in STMT is interesting
1076 and if so, records it. */
1078 static void
1079 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1081 tree *op0_p;
1082 tree *op1_p;
1083 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1084 struct iv const_iv;
1085 tree zero = integer_zero_node;
1087 const_iv.step = NULL_TREE;
1089 if (integer_zerop (*cond_p)
1090 || integer_nonzerop (*cond_p))
1091 return;
1093 if (TREE_CODE (*cond_p) == SSA_NAME)
1095 op0_p = cond_p;
1096 op1_p = &zero;
1098 else
1100 op0_p = &TREE_OPERAND (*cond_p, 0);
1101 op1_p = &TREE_OPERAND (*cond_p, 1);
1104 if (TREE_CODE (*op0_p) == SSA_NAME)
1105 iv0 = get_iv (data, *op0_p);
1106 else
1107 iv0 = &const_iv;
1109 if (TREE_CODE (*op1_p) == SSA_NAME)
1110 iv1 = get_iv (data, *op1_p);
1111 else
1112 iv1 = &const_iv;
1114 if (/* When comparing with non-invariant value, we may not do any senseful
1115 induction variable elimination. */
1116 (!iv0 || !iv1)
1117 /* Eliminating condition based on two ivs would be nontrivial.
1118 ??? TODO -- it is not really important to handle this case. */
1119 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1121 find_interesting_uses_op (data, *op0_p);
1122 find_interesting_uses_op (data, *op1_p);
1123 return;
1126 if (zero_p (iv0->step) && zero_p (iv1->step))
1128 /* If both are invariants, this is a work for unswitching. */
1129 return;
1132 civ = xmalloc (sizeof (struct iv));
1133 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1134 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1137 /* Cumulates the steps of indices into DATA and replaces their values with the
1138 initial ones. Returns false when the value of the index cannot be determined.
1139 Callback for for_each_index. */
1141 struct ifs_ivopts_data
1143 struct ivopts_data *ivopts_data;
1144 tree stmt;
1145 tree *step_p;
1148 static bool
1149 idx_find_step (tree base, tree *idx, void *data)
1151 struct ifs_ivopts_data *dta = data;
1152 struct iv *iv;
1153 tree step, type, iv_type, iv_step;
1155 if (TREE_CODE (*idx) != SSA_NAME)
1156 return true;
1158 iv = get_iv (dta->ivopts_data, *idx);
1159 if (!iv)
1160 return false;
1162 *idx = iv->base;
1164 if (!iv->step)
1165 return true;
1167 iv_type = TREE_TYPE (iv->base);
1168 type = build_pointer_type (TREE_TYPE (base));
1169 if (TREE_CODE (base) == ARRAY_REF)
1170 step = array_ref_element_size (base);
1171 else
1172 /* The step for pointer arithmetics already is 1 byte. */
1173 step = build_int_cst (type, 1);
1175 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1176 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1177 type, iv->base, iv->step, dta->stmt);
1178 else
1179 iv_step = fold_convert (iv_type, iv->step);
1181 if (!iv_step)
1183 /* The index might wrap. */
1184 return false;
1187 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1189 if (!*dta->step_p)
1190 *dta->step_p = step;
1191 else
1192 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1194 return true;
1197 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1198 object is passed to it in DATA. */
1200 static bool
1201 idx_record_use (tree base ATTRIBUTE_UNUSED, tree *idx,
1202 void *data)
1204 find_interesting_uses_op (data, *idx);
1205 return true;
1208 /* Finds addresses in *OP_P inside STMT. */
1210 static void
1211 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1213 tree base = unshare_expr (*op_p), step = NULL;
1214 struct iv *civ;
1215 struct ifs_ivopts_data ifs_ivopts_data;
1217 /* Ignore bitfields for now. Not really something terribly complicated
1218 to handle. TODO. */
1219 if (TREE_CODE (base) == COMPONENT_REF
1220 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1221 goto fail;
1223 ifs_ivopts_data.ivopts_data = data;
1224 ifs_ivopts_data.stmt = stmt;
1225 ifs_ivopts_data.step_p = &step;
1226 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1227 || zero_p (step))
1228 goto fail;
1230 if (TREE_CODE (base) == INDIRECT_REF)
1231 base = TREE_OPERAND (base, 0);
1232 else
1233 base = build_addr (base);
1235 civ = alloc_iv (base, step);
1236 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1237 return;
1239 fail:
1240 for_each_index (op_p, idx_record_use, data);
1243 /* Finds and records invariants used in STMT. */
1245 static void
1246 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1248 use_optype uses = NULL;
1249 unsigned i, n;
1250 tree op;
1252 if (TREE_CODE (stmt) == PHI_NODE)
1253 n = PHI_NUM_ARGS (stmt);
1254 else
1256 get_stmt_operands (stmt);
1257 uses = STMT_USE_OPS (stmt);
1258 n = NUM_USES (uses);
1261 for (i = 0; i < n; i++)
1263 if (TREE_CODE (stmt) == PHI_NODE)
1264 op = PHI_ARG_DEF (stmt, i);
1265 else
1266 op = USE_OP (uses, i);
1268 record_invariant (data, op, false);
1272 /* Finds interesting uses of induction variables in the statement STMT. */
1274 static void
1275 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1277 struct iv *iv;
1278 tree op, lhs, rhs;
1279 use_optype uses = NULL;
1280 unsigned i, n;
1282 find_invariants_stmt (data, stmt);
1284 if (TREE_CODE (stmt) == COND_EXPR)
1286 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1287 return;
1290 if (TREE_CODE (stmt) == MODIFY_EXPR)
1292 lhs = TREE_OPERAND (stmt, 0);
1293 rhs = TREE_OPERAND (stmt, 1);
1295 if (TREE_CODE (lhs) == SSA_NAME)
1297 /* If the statement defines an induction variable, the uses are not
1298 interesting by themselves. */
1300 iv = get_iv (data, lhs);
1302 if (iv && !zero_p (iv->step))
1303 return;
1306 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1308 case '<':
1309 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1310 return;
1312 case 'r':
1313 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1314 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1315 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1316 return;
1318 default: ;
1321 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1323 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1324 find_interesting_uses_op (data, rhs);
1325 return;
1329 if (TREE_CODE (stmt) == PHI_NODE
1330 && bb_for_stmt (stmt) == data->current_loop->header)
1332 lhs = PHI_RESULT (stmt);
1333 iv = get_iv (data, lhs);
1335 if (iv && !zero_p (iv->step))
1336 return;
1339 if (TREE_CODE (stmt) == PHI_NODE)
1340 n = PHI_NUM_ARGS (stmt);
1341 else
1343 uses = STMT_USE_OPS (stmt);
1344 n = NUM_USES (uses);
1347 for (i = 0; i < n; i++)
1349 if (TREE_CODE (stmt) == PHI_NODE)
1350 op = PHI_ARG_DEF (stmt, i);
1351 else
1352 op = USE_OP (uses, i);
1354 if (TREE_CODE (op) != SSA_NAME)
1355 continue;
1357 iv = get_iv (data, op);
1358 if (!iv)
1359 continue;
1361 find_interesting_uses_op (data, op);
1365 /* Finds interesting uses of induction variables outside of loops
1366 on loop exit edge EXIT. */
1368 static void
1369 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1371 tree phi, def;
1373 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1375 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1376 find_interesting_uses_outer (data, def);
1380 /* Finds uses of the induction variables that are interesting. */
1382 static void
1383 find_interesting_uses (struct ivopts_data *data)
1385 basic_block bb;
1386 block_stmt_iterator bsi;
1387 tree phi;
1388 basic_block *body = get_loop_body (data->current_loop);
1389 unsigned i;
1390 struct version_info *info;
1391 edge e;
1393 if (dump_file && (dump_flags & TDF_DETAILS))
1394 fprintf (dump_file, "Uses:\n\n");
1396 for (i = 0; i < data->current_loop->num_nodes; i++)
1398 bb = body[i];
1400 for (e = bb->succ; e; e = e->succ_next)
1401 if (e->dest != EXIT_BLOCK_PTR
1402 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1403 find_interesting_uses_outside (data, e);
1405 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1406 find_interesting_uses_stmt (data, phi);
1407 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1408 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1411 if (dump_file && (dump_flags & TDF_DETAILS))
1413 fprintf (dump_file, "\n");
1415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1417 info = ver_info (data, i);
1418 if (info->inv_id)
1420 fprintf (dump_file, " ");
1421 print_generic_expr (dump_file, info->name, TDF_SLIM);
1422 fprintf (dump_file, " is invariant (%d)%s\n",
1423 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1427 fprintf (dump_file, "\n");
1430 free (body);
1433 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1434 position to POS. If USE is not NULL, the candidate is set as related to
1435 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1436 replacement of the final value of the iv by a direct computation. */
1438 static struct iv_cand *
1439 add_candidate_1 (struct ivopts_data *data,
1440 tree base, tree step, bool important, enum iv_position pos,
1441 struct iv_use *use, tree incremented_at)
1443 unsigned i;
1444 struct iv_cand *cand = NULL;
1445 tree type;
1447 if (base)
1449 type = TREE_TYPE (base);
1450 if (!TYPE_UNSIGNED (type))
1452 type = unsigned_type_for (type);
1453 base = fold_convert (type, base);
1454 if (step)
1455 step = fold_convert (type, step);
1459 for (i = 0; i < n_iv_cands (data); i++)
1461 cand = iv_cand (data, i);
1463 if (cand->pos != pos)
1464 continue;
1466 if (cand->incremented_at != incremented_at)
1467 continue;
1469 if (!cand->iv)
1471 if (!base && !step)
1472 break;
1474 continue;
1477 if (!base && !step)
1478 continue;
1480 if (!operand_equal_p (base, cand->iv->base, 0))
1481 continue;
1483 if (zero_p (cand->iv->step))
1485 if (zero_p (step))
1486 break;
1488 else
1490 if (step && operand_equal_p (step, cand->iv->step, 0))
1491 break;
1495 if (i == n_iv_cands (data))
1497 cand = xcalloc (1, sizeof (struct iv_cand));
1498 cand->id = i;
1500 if (!base && !step)
1501 cand->iv = NULL;
1502 else
1503 cand->iv = alloc_iv (base, step);
1505 cand->pos = pos;
1506 if (pos != IP_ORIGINAL && cand->iv)
1508 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1509 cand->var_after = cand->var_before;
1511 cand->important = important;
1512 cand->incremented_at = incremented_at;
1513 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1515 if (dump_file && (dump_flags & TDF_DETAILS))
1516 dump_cand (dump_file, cand);
1519 if (important && !cand->important)
1521 cand->important = true;
1522 if (dump_file && (dump_flags & TDF_DETAILS))
1523 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1526 if (use)
1528 bitmap_set_bit (use->related_cands, i);
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, "Candidate %d is related to use %d\n",
1531 cand->id, use->id);
1534 return cand;
1537 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1538 position to POS. If USE is not NULL, the candidate is set as related to
1539 it. The candidate computation is scheduled on all available positions. */
1541 static void
1542 add_candidate (struct ivopts_data *data,
1543 tree base, tree step, bool important, struct iv_use *use)
1545 if (ip_normal_pos (data->current_loop))
1546 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1547 if (ip_end_pos (data->current_loop))
1548 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1551 /* Adds standard iv candidates. */
1553 static void
1554 add_standard_iv_candidates (struct ivopts_data *data)
1556 /* Add 0 + 1 * iteration candidate. */
1557 add_candidate (data,
1558 build_int_cst (unsigned_intSI_type_node, 0),
1559 build_int_cst (unsigned_intSI_type_node, 1),
1560 true, NULL);
1562 /* The same for a long type if it is still fast enough. */
1563 if (BITS_PER_WORD > 32)
1564 add_candidate (data,
1565 build_int_cst (unsigned_intDI_type_node, 0),
1566 build_int_cst (unsigned_intDI_type_node, 1),
1567 true, NULL);
1571 /* Adds candidates bases on the old induction variable IV. */
1573 static void
1574 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1576 tree phi, def;
1577 struct iv_cand *cand;
1579 add_candidate (data, iv->base, iv->step, true, NULL);
1581 /* The same, but with initial value zero. */
1582 add_candidate (data,
1583 build_int_cst (TREE_TYPE (iv->base), 0),
1584 iv->step, true, NULL);
1586 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1587 if (TREE_CODE (phi) == PHI_NODE)
1589 /* Additionally record the possibility of leaving the original iv
1590 untouched. */
1591 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1592 cand = add_candidate_1 (data,
1593 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1594 SSA_NAME_DEF_STMT (def));
1595 cand->var_before = iv->ssa_name;
1596 cand->var_after = def;
1600 /* Adds candidates based on the old induction variables. */
1602 static void
1603 add_old_ivs_candidates (struct ivopts_data *data)
1605 unsigned i;
1606 struct iv *iv;
1608 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1610 iv = ver_info (data, i)->iv;
1611 if (iv && iv->biv_p && !zero_p (iv->step))
1612 add_old_iv_candidates (data, iv);
1616 /* Adds candidates based on the value of the induction variable IV and USE. */
1618 static void
1619 add_iv_value_candidates (struct ivopts_data *data,
1620 struct iv *iv, struct iv_use *use)
1622 add_candidate (data, iv->base, iv->step, false, use);
1624 /* The same, but with initial value zero. */
1625 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1626 iv->step, false, use);
1629 /* Adds candidates based on the address IV and USE. */
1631 static void
1632 add_address_candidates (struct ivopts_data *data,
1633 struct iv *iv, struct iv_use *use)
1635 tree base, abase, tmp, *act;
1637 /* First, the trivial choices. */
1638 add_iv_value_candidates (data, iv, use);
1640 /* Second, try removing the COMPONENT_REFs. */
1641 if (TREE_CODE (iv->base) == ADDR_EXPR)
1643 base = TREE_OPERAND (iv->base, 0);
1644 while (TREE_CODE (base) == COMPONENT_REF
1645 || (TREE_CODE (base) == ARRAY_REF
1646 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1647 base = TREE_OPERAND (base, 0);
1649 if (base != TREE_OPERAND (iv->base, 0))
1651 if (TREE_CODE (base) == INDIRECT_REF)
1652 base = TREE_OPERAND (base, 0);
1653 else
1654 base = build_addr (base);
1655 add_candidate (data, base, iv->step, false, use);
1659 /* Third, try removing the constant offset. */
1660 abase = iv->base;
1661 while (TREE_CODE (abase) == PLUS_EXPR
1662 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1663 abase = TREE_OPERAND (abase, 0);
1664 /* We found the offset, so make the copy of the non-shared part and
1665 remove it. */
1666 if (TREE_CODE (abase) == PLUS_EXPR)
1668 tmp = iv->base;
1669 act = &base;
1671 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1673 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1674 NULL_TREE, TREE_OPERAND (tmp, 1));
1675 act = &TREE_OPERAND (*act, 0);
1677 *act = TREE_OPERAND (tmp, 0);
1679 add_candidate (data, base, iv->step, false, use);
1683 /* Possibly adds pseudocandidate for replacing the final value of USE by
1684 a direct computation. */
1686 static void
1687 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1689 struct tree_niter_desc *niter;
1690 struct loop *loop = data->current_loop;
1692 /* We must know where we exit the loop and how many times does it roll. */
1693 if (!single_dom_exit (loop))
1694 return;
1696 niter = &loop_data (loop)->niter;
1697 if (!niter->niter
1698 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1699 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1700 return;
1702 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1705 /* Adds candidates based on the uses. */
1707 static void
1708 add_derived_ivs_candidates (struct ivopts_data *data)
1710 unsigned i;
1712 for (i = 0; i < n_iv_uses (data); i++)
1714 struct iv_use *use = iv_use (data, i);
1716 if (!use)
1717 continue;
1719 switch (use->type)
1721 case USE_NONLINEAR_EXPR:
1722 case USE_COMPARE:
1723 /* Just add the ivs based on the value of the iv used here. */
1724 add_iv_value_candidates (data, use->iv, use);
1725 break;
1727 case USE_OUTER:
1728 add_iv_value_candidates (data, use->iv, use);
1730 /* Additionally, add the pseudocandidate for the possibility to
1731 replace the final value by a direct computation. */
1732 add_iv_outer_candidates (data, use);
1733 break;
1735 case USE_ADDRESS:
1736 add_address_candidates (data, use->iv, use);
1737 break;
1739 default:
1740 gcc_unreachable ();
1745 /* Finds the candidates for the induction variables. */
1747 static void
1748 find_iv_candidates (struct ivopts_data *data)
1750 /* Add commonly used ivs. */
1751 add_standard_iv_candidates (data);
1753 /* Add old induction variables. */
1754 add_old_ivs_candidates (data);
1756 /* Add induction variables derived from uses. */
1757 add_derived_ivs_candidates (data);
1760 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1761 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1762 we allocate a simple list to every use. */
1764 static void
1765 alloc_use_cost_map (struct ivopts_data *data)
1767 unsigned i, n_imp = 0, size, j;
1769 if (!data->consider_all_candidates)
1771 for (i = 0; i < n_iv_cands (data); i++)
1773 struct iv_cand *cand = iv_cand (data, i);
1774 if (cand->important)
1775 n_imp++;
1779 for (i = 0; i < n_iv_uses (data); i++)
1781 struct iv_use *use = iv_use (data, i);
1783 if (data->consider_all_candidates)
1785 size = n_iv_cands (data);
1786 use->n_map_members = size;
1788 else
1790 size = n_imp;
1791 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1792 use->n_map_members = 0;
1795 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1799 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1800 on invariants DEPENDS_ON. */
1802 static void
1803 set_use_iv_cost (struct ivopts_data *data,
1804 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1805 bitmap depends_on)
1807 if (cost == INFTY
1808 && depends_on)
1810 BITMAP_XFREE (depends_on);
1811 depends_on = NULL;
1814 if (data->consider_all_candidates)
1816 use->cost_map[cand->id].cand = cand;
1817 use->cost_map[cand->id].cost = cost;
1818 use->cost_map[cand->id].depends_on = depends_on;
1819 return;
1822 if (cost == INFTY)
1823 return;
1825 use->cost_map[use->n_map_members].cand = cand;
1826 use->cost_map[use->n_map_members].cost = cost;
1827 use->cost_map[use->n_map_members].depends_on = depends_on;
1828 use->n_map_members++;
1831 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1832 DEPENDS_ON. */
1834 static unsigned
1835 get_use_iv_cost (struct ivopts_data *data,
1836 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1838 unsigned i;
1840 if (!cand)
1841 return INFTY;
1843 if (data->consider_all_candidates)
1844 i = cand->id;
1845 else
1847 for (i = 0; i < use->n_map_members; i++)
1848 if (use->cost_map[i].cand == cand)
1849 break;
1851 if (i == use->n_map_members)
1852 return INFTY;
1855 if (depends_on)
1856 *depends_on = use->cost_map[i].depends_on;
1857 return use->cost_map[i].cost;
1860 /* Returns estimate on cost of computing SEQ. */
1862 static unsigned
1863 seq_cost (rtx seq)
1865 unsigned cost = 0;
1866 rtx set;
1868 for (; seq; seq = NEXT_INSN (seq))
1870 set = single_set (seq);
1871 if (set)
1872 cost += rtx_cost (set, SET);
1873 else
1874 cost++;
1877 return cost;
1880 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1881 static rtx
1882 produce_memory_decl_rtl (tree obj, int *regno)
1884 rtx x;
1885 if (!obj)
1886 abort ();
1887 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1889 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1890 x = gen_rtx_SYMBOL_REF (Pmode, name);
1892 else
1893 x = gen_raw_REG (Pmode, (*regno)++);
1895 return gen_rtx_MEM (DECL_MODE (obj), x);
1898 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1899 walk_tree. DATA contains the actual fake register number. */
1901 static tree
1902 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1904 tree obj = NULL_TREE;
1905 rtx x = NULL_RTX;
1906 int *regno = data;
1908 switch (TREE_CODE (*expr_p))
1910 case ADDR_EXPR:
1911 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1912 (handled_component_p (*expr_p)
1913 || TREE_CODE (*expr_p) == REALPART_EXPR
1914 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1915 expr_p = &TREE_OPERAND (*expr_p, 0));
1916 obj = *expr_p;
1917 if (DECL_P (obj))
1918 x = produce_memory_decl_rtl (obj, regno);
1919 break;
1921 case SSA_NAME:
1922 *ws = 0;
1923 obj = SSA_NAME_VAR (*expr_p);
1924 if (!DECL_RTL_SET_P (obj))
1925 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1926 break;
1928 case VAR_DECL:
1929 case PARM_DECL:
1930 case RESULT_DECL:
1931 *ws = 0;
1932 obj = *expr_p;
1934 if (DECL_RTL_SET_P (obj))
1935 break;
1937 if (DECL_MODE (obj) == BLKmode)
1938 x = produce_memory_decl_rtl (obj, regno);
1939 else
1940 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1942 break;
1944 default:
1945 break;
1948 if (x)
1950 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
1951 SET_DECL_RTL (obj, x);
1954 return NULL_TREE;
1957 /* Determines cost of the computation of EXPR. */
1959 static unsigned
1960 computation_cost (tree expr)
1962 rtx seq, rslt;
1963 tree type = TREE_TYPE (expr);
1964 unsigned cost;
1965 int regno = 0;
1967 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
1968 start_sequence ();
1969 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
1970 seq = get_insns ();
1971 end_sequence ();
1973 cost = seq_cost (seq);
1974 if (GET_CODE (rslt) == MEM)
1975 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
1977 return cost;
1980 /* Returns variable containing the value of candidate CAND at statement AT. */
1982 static tree
1983 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
1985 if (stmt_after_increment (loop, cand, stmt))
1986 return cand->var_after;
1987 else
1988 return cand->var_before;
1991 /* Determines the expression by that USE is expressed from induction variable
1992 CAND at statement AT in LOOP. */
1994 static tree
1995 get_computation_at (struct loop *loop,
1996 struct iv_use *use, struct iv_cand *cand, tree at)
1998 tree ubase = unsave_expr_now (use->iv->base);
1999 tree ustep = unsave_expr_now (use->iv->step);
2000 tree cbase = unsave_expr_now (cand->iv->base);
2001 tree cstep = unsave_expr_now (cand->iv->step);
2002 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2003 tree uutype;
2004 tree expr, delta;
2005 tree ratio;
2006 unsigned HOST_WIDE_INT ustepi, cstepi;
2007 HOST_WIDE_INT ratioi;
2009 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2011 /* We do not have a precision to express the values of use. */
2012 return NULL_TREE;
2015 expr = var_at_stmt (loop, cand, at);
2017 if (TREE_TYPE (expr) != ctype)
2019 /* This may happen with the original ivs. */
2020 expr = fold_convert (ctype, expr);
2023 if (TYPE_UNSIGNED (utype))
2024 uutype = utype;
2025 else
2027 uutype = unsigned_type_for (utype);
2028 ubase = fold_convert (uutype, ubase);
2029 ustep = fold_convert (uutype, ustep);
2032 if (uutype != ctype)
2034 expr = fold_convert (uutype, expr);
2035 cbase = fold_convert (uutype, cbase);
2036 cstep = fold_convert (uutype, cstep);
2039 if (!cst_and_fits_in_hwi (cstep)
2040 || !cst_and_fits_in_hwi (ustep))
2041 return NULL_TREE;
2043 ustepi = int_cst_value (ustep);
2044 cstepi = int_cst_value (cstep);
2046 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2048 /* TODO maybe consider case when ustep divides cstep and the ratio is
2049 a power of 2 (so that the division is fast to execute)? We would
2050 need to be much more careful with overflows etc. then. */
2051 return NULL_TREE;
2054 /* We may need to shift the value if we are after the increment. */
2055 if (stmt_after_increment (loop, cand, at))
2056 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2058 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2059 or |ratio| == 1, it is better to handle this like
2061 ubase - ratio * cbase + ratio * var. */
2063 if (ratioi == 1)
2065 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2066 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2068 else if (ratioi == -1)
2070 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2071 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2073 else if (TREE_CODE (cbase) == INTEGER_CST)
2075 ratio = build_int_cst_type (uutype, ratioi);
2076 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2077 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2078 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2079 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2081 else
2083 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2084 ratio = build_int_cst_type (uutype, ratioi);
2085 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2086 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2089 return fold_convert (utype, expr);
2092 /* Determines the expression by that USE is expressed from induction variable
2093 CAND in LOOP. */
2095 static tree
2096 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2098 return get_computation_at (loop, use, cand, use->stmt);
2101 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2103 static void
2104 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2106 tree op0, op1;
2107 enum tree_code code;
2109 while (1)
2111 if (cst_and_fits_in_hwi (*expr))
2113 *offset += int_cst_value (*expr);
2114 *expr = integer_zero_node;
2115 return;
2118 code = TREE_CODE (*expr);
2120 if (code != PLUS_EXPR && code != MINUS_EXPR)
2121 return;
2123 op0 = TREE_OPERAND (*expr, 0);
2124 op1 = TREE_OPERAND (*expr, 1);
2126 if (cst_and_fits_in_hwi (op1))
2128 if (code == PLUS_EXPR)
2129 *offset += int_cst_value (op1);
2130 else
2131 *offset -= int_cst_value (op1);
2133 *expr = op0;
2134 continue;
2137 if (code != PLUS_EXPR)
2138 return;
2140 if (!cst_and_fits_in_hwi (op0))
2141 return;
2143 *offset += int_cst_value (op0);
2144 *expr = op1;
2148 /* Returns cost of addition in MODE. */
2150 static unsigned
2151 add_cost (enum machine_mode mode)
2153 static unsigned costs[NUM_MACHINE_MODES];
2154 rtx seq;
2155 unsigned cost;
2157 if (costs[mode])
2158 return costs[mode];
2160 start_sequence ();
2161 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2162 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2163 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2164 NULL_RTX);
2165 seq = get_insns ();
2166 end_sequence ();
2168 cost = seq_cost (seq);
2169 if (!cost)
2170 cost = 1;
2172 costs[mode] = cost;
2174 if (dump_file && (dump_flags & TDF_DETAILS))
2175 fprintf (dump_file, "Addition in %s costs %d\n",
2176 GET_MODE_NAME (mode), cost);
2177 return cost;
2180 /* Entry in a hashtable of already known costs for multiplication. */
2181 struct mbc_entry
2183 HOST_WIDE_INT cst; /* The constant to multiply by. */
2184 enum machine_mode mode; /* In mode. */
2185 unsigned cost; /* The cost. */
2188 /* Counts hash value for the ENTRY. */
2190 static hashval_t
2191 mbc_entry_hash (const void *entry)
2193 const struct mbc_entry *e = entry;
2195 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2198 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2200 static int
2201 mbc_entry_eq (const void *entry1, const void *entry2)
2203 const struct mbc_entry *e1 = entry1;
2204 const struct mbc_entry *e2 = entry2;
2206 return (e1->mode == e2->mode
2207 && e1->cst == e2->cst);
2210 /* Returns cost of multiplication by constant CST in MODE. */
2212 static unsigned
2213 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2215 static htab_t costs;
2216 struct mbc_entry **cached, act;
2217 rtx seq;
2218 unsigned cost;
2220 if (!costs)
2221 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2223 act.mode = mode;
2224 act.cst = cst;
2225 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2226 if (*cached)
2227 return (*cached)->cost;
2229 *cached = xmalloc (sizeof (struct mbc_entry));
2230 (*cached)->mode = mode;
2231 (*cached)->cst = cst;
2233 start_sequence ();
2234 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2235 NULL_RTX, 0);
2236 seq = get_insns ();
2237 end_sequence ();
2239 cost = seq_cost (seq);
2241 if (dump_file && (dump_flags & TDF_DETAILS))
2242 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2243 (int) cst, GET_MODE_NAME (mode), cost);
2245 (*cached)->cost = cost;
2247 return cost;
2250 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2251 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2252 variable is omitted. The created memory accesses MODE.
2254 TODO -- there must be some better way. This all is quite crude. */
2256 static unsigned
2257 get_address_cost (bool symbol_present, bool var_present,
2258 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2260 #define MAX_RATIO 128
2261 static sbitmap valid_mult;
2262 static HOST_WIDE_INT rat, off;
2263 static HOST_WIDE_INT min_offset, max_offset;
2264 static unsigned costs[2][2][2][2];
2265 unsigned cost, acost;
2266 rtx seq, addr, base;
2267 bool offset_p, ratio_p;
2268 rtx reg1;
2269 HOST_WIDE_INT s_offset;
2270 unsigned HOST_WIDE_INT mask;
2271 unsigned bits;
2273 if (!valid_mult)
2275 HOST_WIDE_INT i;
2277 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2279 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2280 for (i = 1; i <= 1 << 20; i <<= 1)
2282 XEXP (addr, 1) = GEN_INT (i);
2283 if (!memory_address_p (Pmode, addr))
2284 break;
2286 max_offset = i >> 1;
2287 off = max_offset;
2289 for (i = 1; i <= 1 << 20; i <<= 1)
2291 XEXP (addr, 1) = GEN_INT (-i);
2292 if (!memory_address_p (Pmode, addr))
2293 break;
2295 min_offset = -(i >> 1);
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2299 fprintf (dump_file, "get_address_cost:\n");
2300 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2301 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2304 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2305 sbitmap_zero (valid_mult);
2306 rat = 1;
2307 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2308 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2310 XEXP (addr, 1) = GEN_INT (i);
2311 if (memory_address_p (Pmode, addr))
2313 SET_BIT (valid_mult, i + MAX_RATIO);
2314 rat = i;
2318 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, " allowed multipliers:");
2321 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2322 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2323 fprintf (dump_file, " %d", (int) i);
2324 fprintf (dump_file, "\n");
2325 fprintf (dump_file, "\n");
2329 bits = GET_MODE_BITSIZE (Pmode);
2330 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2331 offset &= mask;
2332 if ((offset >> (bits - 1) & 1))
2333 offset |= ~mask;
2334 s_offset = offset;
2336 cost = 0;
2337 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2338 ratio_p = (ratio != 1
2339 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2340 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2342 if (ratio != 1 && !ratio_p)
2343 cost += multiply_by_cost (ratio, Pmode);
2345 if (s_offset && !offset_p && !symbol_present)
2347 cost += add_cost (Pmode);
2348 var_present = true;
2351 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2352 if (!acost)
2354 acost = 0;
2356 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2357 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2358 if (ratio_p)
2359 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2361 if (symbol_present)
2363 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2364 if (offset_p)
2365 base = gen_rtx_fmt_e (CONST, Pmode,
2366 gen_rtx_fmt_ee (PLUS, Pmode,
2367 base,
2368 GEN_INT (off)));
2369 if (var_present)
2370 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2373 else if (var_present)
2375 base = reg1;
2376 if (offset_p)
2377 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2379 else if (offset_p)
2380 base = GEN_INT (off);
2381 else
2382 base = NULL_RTX;
2384 if (base)
2385 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2387 start_sequence ();
2388 addr = memory_address (Pmode, addr);
2389 seq = get_insns ();
2390 end_sequence ();
2392 acost = seq_cost (seq);
2393 acost += address_cost (addr, Pmode);
2395 if (!acost)
2396 acost = 1;
2397 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2400 return cost + acost;
2403 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2404 the bitmap to that we should store it. */
2406 static struct ivopts_data *fd_ivopts_data;
2407 static tree
2408 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2410 bitmap *depends_on = data;
2411 struct version_info *info;
2413 if (TREE_CODE (*expr_p) != SSA_NAME)
2414 return NULL_TREE;
2415 info = name_info (fd_ivopts_data, *expr_p);
2417 if (!info->inv_id || info->has_nonlin_use)
2418 return NULL_TREE;
2420 if (!*depends_on)
2421 *depends_on = BITMAP_XMALLOC ();
2422 bitmap_set_bit (*depends_on, info->inv_id);
2424 return NULL_TREE;
2427 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2428 invariants the computation depends on. */
2430 static unsigned
2431 force_var_cost (struct ivopts_data *data,
2432 tree expr, bitmap *depends_on)
2434 static bool costs_initialized = false;
2435 static unsigned integer_cost;
2436 static unsigned symbol_cost;
2437 static unsigned address_cost;
2439 if (!costs_initialized)
2441 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2442 rtx x = gen_rtx_MEM (DECL_MODE (var),
2443 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2444 tree addr;
2445 tree type = build_pointer_type (integer_type_node);
2447 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2448 2000));
2450 SET_DECL_RTL (var, x);
2451 TREE_STATIC (var) = 1;
2452 addr = build1 (ADDR_EXPR, type, var);
2453 symbol_cost = computation_cost (addr) + 1;
2455 address_cost
2456 = computation_cost (build2 (PLUS_EXPR, type,
2457 addr,
2458 build_int_cst_type (type, 2000))) + 1;
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "force_var_cost:\n");
2462 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2463 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2464 fprintf (dump_file, " address %d\n", (int) address_cost);
2465 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2466 fprintf (dump_file, "\n");
2469 costs_initialized = true;
2472 if (depends_on)
2474 fd_ivopts_data = data;
2475 walk_tree (&expr, find_depends, depends_on, NULL);
2478 if (SSA_VAR_P (expr))
2479 return 0;
2481 if (TREE_INVARIANT (expr))
2483 if (TREE_CODE (expr) == INTEGER_CST)
2484 return integer_cost;
2486 if (TREE_CODE (expr) == ADDR_EXPR)
2488 tree obj = TREE_OPERAND (expr, 0);
2490 if (TREE_CODE (obj) == VAR_DECL
2491 || TREE_CODE (obj) == PARM_DECL
2492 || TREE_CODE (obj) == RESULT_DECL)
2493 return symbol_cost;
2496 return address_cost;
2499 /* Just an arbitrary value, FIXME. */
2500 return target_spill_cost;
2503 /* Peels a single layer of ADDR. If DIFF is not NULL, do it only if the
2504 offset is constant and add the offset to DIFF. */
2506 static tree
2507 peel_address (tree addr, unsigned HOST_WIDE_INT *diff)
2509 tree off, size;
2510 HOST_WIDE_INT bit_offset;
2512 switch (TREE_CODE (addr))
2514 case SSA_NAME:
2515 case INDIRECT_REF:
2516 case BIT_FIELD_REF:
2517 case VAR_DECL:
2518 case PARM_DECL:
2519 case RESULT_DECL:
2520 case STRING_CST:
2521 case REALPART_EXPR:
2522 case IMAGPART_EXPR:
2523 return NULL_TREE;
2525 case COMPONENT_REF:
2526 off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (addr, 1));
2527 bit_offset = TREE_INT_CST_LOW (off);
2529 gcc_assert ((bit_offset % BITS_PER_UNIT) == 0);
2531 if (diff)
2532 *diff += bit_offset / BITS_PER_UNIT;
2534 return TREE_OPERAND (addr, 0);
2536 case VIEW_CONVERT_EXPR:
2537 return TREE_OPERAND (addr, 0);
2539 case ARRAY_REF:
2540 off = TREE_OPERAND (addr, 1);
2542 if (diff)
2544 if (!cst_and_fits_in_hwi (off))
2545 return NULL_TREE;
2547 size = TYPE_SIZE_UNIT (TREE_TYPE (addr));
2548 if (!cst_and_fits_in_hwi (size))
2549 return NULL_TREE;
2551 *diff += TREE_INT_CST_LOW (off) * TREE_INT_CST_LOW (size);
2554 return TREE_OPERAND (addr, 0);
2556 default:
2557 gcc_unreachable ();
2561 /* Checks whether E1 and E2 have constant difference, and if they do,
2562 store it in *DIFF. */
2564 static bool
2565 ptr_difference_const (tree e1, tree e2, unsigned HOST_WIDE_INT *diff)
2567 int d1 = 0, d2 = 0;
2568 tree x;
2569 unsigned HOST_WIDE_INT delta1 = 0, delta2 = 0;
2571 /* Find depths of E1 and E2. */
2572 for (x = e1; x; x = peel_address (x, NULL))
2573 d1++;
2574 for (x = e2; x; x = peel_address (x, NULL))
2575 d2++;
2577 for (; e1 && d1 > d2; e1 = peel_address (e1, &delta1))
2578 d1--;
2579 for (; e2 && d2 > d1; e2 = peel_address (e2, &delta2))
2580 d2--;
2582 while (e1 && e2 && !operand_equal_p (e1, e2, 0))
2584 e1 = peel_address (e1, &delta1);
2585 e2 = peel_address (e2, &delta1);
2588 if (!e1 || !e2)
2589 return false;
2591 *diff = delta1 - delta2;
2592 return true;
2595 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2596 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2597 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2598 invariants the computation depends on. */
2600 static unsigned
2601 split_address_cost (struct ivopts_data *data,
2602 tree addr, bool *symbol_present, bool *var_present,
2603 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2605 tree core = addr;
2607 while (core
2608 && TREE_CODE (core) != VAR_DECL)
2609 core = peel_address (core, offset);
2611 if (!core)
2613 *symbol_present = false;
2614 *var_present = true;
2615 fd_ivopts_data = data;
2616 walk_tree (&addr, find_depends, depends_on, NULL);
2617 return target_spill_cost;
2620 if (TREE_STATIC (core)
2621 || DECL_EXTERNAL (core))
2623 *symbol_present = true;
2624 *var_present = false;
2625 return 0;
2628 *symbol_present = false;
2629 *var_present = true;
2630 return 0;
2633 /* Estimates cost of expressing difference of addresses E1 - E2 as
2634 var + symbol + offset. The value of offset is added to OFFSET,
2635 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2636 part is missing. DEPENDS_ON is a set of the invariants the computation
2637 depends on. */
2639 static unsigned
2640 ptr_difference_cost (struct ivopts_data *data,
2641 tree e1, tree e2, bool *symbol_present, bool *var_present,
2642 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2644 unsigned HOST_WIDE_INT diff = 0;
2645 unsigned cost;
2647 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2649 if (TREE_CODE (e2) == ADDR_EXPR
2650 && ptr_difference_const (TREE_OPERAND (e1, 0),
2651 TREE_OPERAND (e2, 0), &diff))
2653 *offset += diff;
2654 *symbol_present = false;
2655 *var_present = false;
2656 return 0;
2659 if (e2 == integer_zero_node)
2660 return split_address_cost (data, TREE_OPERAND (e1, 0),
2661 symbol_present, var_present, offset, depends_on);
2663 *symbol_present = false;
2664 *var_present = true;
2666 cost = force_var_cost (data, e1, depends_on);
2667 cost += force_var_cost (data, e2, depends_on);
2668 cost += add_cost (Pmode);
2670 return cost;
2673 /* Estimates cost of expressing difference E1 - E2 as
2674 var + symbol + offset. The value of offset is added to OFFSET,
2675 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2676 part is missing. DEPENDS_ON is a set of the invariants the computation
2677 depends on. */
2679 static unsigned
2680 difference_cost (struct ivopts_data *data,
2681 tree e1, tree e2, bool *symbol_present, bool *var_present,
2682 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2684 unsigned cost;
2685 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2687 strip_offset (&e1, offset);
2688 *offset = -*offset;
2689 strip_offset (&e2, offset);
2690 *offset = -*offset;
2692 if (TREE_CODE (e1) == ADDR_EXPR)
2693 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2694 depends_on);
2695 *symbol_present = false;
2697 if (operand_equal_p (e1, e2, 0))
2699 *var_present = false;
2700 return 0;
2702 *var_present = true;
2703 if (zero_p (e2))
2704 return force_var_cost (data, e1, depends_on);
2706 if (zero_p (e1))
2708 cost = force_var_cost (data, e2, depends_on);
2709 cost += multiply_by_cost (-1, mode);
2711 return cost;
2714 cost = force_var_cost (data, e1, depends_on);
2715 cost += force_var_cost (data, e2, depends_on);
2716 cost += add_cost (mode);
2718 return cost;
2721 /* Determines the cost of the computation by that USE is expressed
2722 from induction variable CAND. If ADDRESS_P is true, we just need
2723 to create an address from it, otherwise we want to get it into
2724 register. A set of invariants we depend on is stored in
2725 DEPENDS_ON. AT is the statement at that the value is computed. */
2727 static unsigned
2728 get_computation_cost_at (struct ivopts_data *data,
2729 struct iv_use *use, struct iv_cand *cand,
2730 bool address_p, bitmap *depends_on, tree at)
2732 tree ubase = use->iv->base, ustep = use->iv->step;
2733 tree cbase, cstep;
2734 tree utype = TREE_TYPE (ubase), ctype;
2735 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2736 HOST_WIDE_INT ratio, aratio;
2737 bool var_present, symbol_present;
2738 unsigned cost = 0, n_sums;
2740 *depends_on = NULL;
2742 /* Only consider real candidates. */
2743 if (!cand->iv)
2744 return INFTY;
2746 cbase = cand->iv->base;
2747 cstep = cand->iv->step;
2748 ctype = TREE_TYPE (cbase);
2750 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2752 /* We do not have a precision to express the values of use. */
2753 return INFTY;
2756 if (!cst_and_fits_in_hwi (ustep)
2757 || !cst_and_fits_in_hwi (cstep))
2758 return INFTY;
2760 if (TREE_CODE (ubase) == INTEGER_CST
2761 && !cst_and_fits_in_hwi (ubase))
2762 goto fallback;
2764 if (TREE_CODE (cbase) == INTEGER_CST
2765 && !cst_and_fits_in_hwi (cbase))
2766 goto fallback;
2768 ustepi = int_cst_value (ustep);
2769 cstepi = int_cst_value (cstep);
2771 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2773 /* TODO -- add direct handling of this case. */
2774 goto fallback;
2777 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2778 return INFTY;
2780 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2781 or ratio == 1, it is better to handle this like
2783 ubase - ratio * cbase + ratio * var
2785 (also holds in the case ratio == -1, TODO. */
2787 if (TREE_CODE (cbase) == INTEGER_CST)
2789 offset = - ratio * int_cst_value (cbase);
2790 cost += difference_cost (data,
2791 ubase, integer_zero_node,
2792 &symbol_present, &var_present, &offset,
2793 depends_on);
2795 else if (ratio == 1)
2797 cost += difference_cost (data,
2798 ubase, cbase,
2799 &symbol_present, &var_present, &offset,
2800 depends_on);
2802 else
2804 cost += force_var_cost (data, cbase, depends_on);
2805 cost += add_cost (TYPE_MODE (ctype));
2806 cost += difference_cost (data,
2807 ubase, integer_zero_node,
2808 &symbol_present, &var_present, &offset,
2809 depends_on);
2812 /* If we are after the increment, the value of the candidate is higher by
2813 one iteration. */
2814 if (stmt_after_increment (data->current_loop, cand, at))
2815 offset -= ratio * cstepi;
2817 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2818 (symbol/var/const parts may be omitted). If we are looking for an address,
2819 find the cost of addressing this. */
2820 if (address_p)
2821 return get_address_cost (symbol_present, var_present, offset, ratio);
2823 /* Otherwise estimate the costs for computing the expression. */
2824 aratio = ratio > 0 ? ratio : -ratio;
2825 if (!symbol_present && !var_present && !offset)
2827 if (ratio != 1)
2828 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2830 return cost;
2833 if (aratio != 1)
2834 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2836 n_sums = 1;
2837 if (var_present
2838 /* Symbol + offset should be compile-time computable. */
2839 && (symbol_present || offset))
2840 n_sums++;
2842 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2844 fallback:
2846 /* Just get the expression, expand it and measure the cost. */
2847 tree comp = get_computation_at (data->current_loop, use, cand, at);
2849 if (!comp)
2850 return INFTY;
2852 if (address_p)
2853 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2855 return computation_cost (comp);
2859 /* Determines the cost of the computation by that USE is expressed
2860 from induction variable CAND. If ADDRESS_P is true, we just need
2861 to create an address from it, otherwise we want to get it into
2862 register. A set of invariants we depend on is stored in
2863 DEPENDS_ON. */
2865 static unsigned
2866 get_computation_cost (struct ivopts_data *data,
2867 struct iv_use *use, struct iv_cand *cand,
2868 bool address_p, bitmap *depends_on)
2870 return get_computation_cost_at (data,
2871 use, cand, address_p, depends_on, use->stmt);
2874 /* Determines cost of basing replacement of USE on CAND in a generic
2875 expression. */
2877 static void
2878 determine_use_iv_cost_generic (struct ivopts_data *data,
2879 struct iv_use *use, struct iv_cand *cand)
2881 bitmap depends_on;
2882 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2884 set_use_iv_cost (data, use, cand, cost, depends_on);
2887 /* Determines cost of basing replacement of USE on CAND in an address. */
2889 static void
2890 determine_use_iv_cost_address (struct ivopts_data *data,
2891 struct iv_use *use, struct iv_cand *cand)
2893 bitmap depends_on;
2894 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2896 set_use_iv_cost (data, use, cand, cost, depends_on);
2899 /* Computes value of induction variable IV in iteration NITER. */
2901 static tree
2902 iv_value (struct iv *iv, tree niter)
2904 tree val;
2905 tree type = TREE_TYPE (iv->base);
2907 niter = fold_convert (type, niter);
2908 val = fold (build2 (MULT_EXPR, type, iv->step, unsave_expr_now (niter)));
2910 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2913 /* Computes value of candidate CAND at position AT in iteration NITER. */
2915 static tree
2916 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2918 tree val = iv_value (cand->iv, niter);
2919 tree type = TREE_TYPE (cand->iv->base);
2921 if (stmt_after_increment (loop, cand, at))
2922 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2924 return val;
2927 /* Check whether it is possible to express the condition in USE by comparison
2928 of candidate CAND. If so, store the comparison code to COMPARE and the
2929 value compared with to BOUND. */
2931 static bool
2932 may_eliminate_iv (struct loop *loop,
2933 struct iv_use *use, struct iv_cand *cand,
2934 enum tree_code *compare, tree *bound)
2936 edge exit;
2937 struct tree_niter_desc *niter, new_niter;
2938 tree wider_type, type, base;
2940 /* For now just very primitive -- we work just for the single exit condition,
2941 and are quite conservative about the possible overflows. TODO -- both of
2942 these can be improved. */
2943 exit = single_dom_exit (loop);
2944 if (!exit)
2945 return false;
2946 if (use->stmt != last_stmt (exit->src))
2947 return false;
2949 niter = &loop_data (loop)->niter;
2950 if (!niter->niter
2951 || !integer_nonzerop (niter->assumptions)
2952 || !integer_zerop (niter->may_be_zero))
2953 return false;
2955 if (exit->flags & EDGE_TRUE_VALUE)
2956 *compare = EQ_EXPR;
2957 else
2958 *compare = NE_EXPR;
2960 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2962 /* Let us check there is not some problem with overflows, by checking that
2963 the number of iterations is unchanged. */
2964 base = cand->iv->base;
2965 type = TREE_TYPE (base);
2966 if (stmt_after_increment (loop, cand, use->stmt))
2967 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2969 new_niter.niter = NULL_TREE;
2970 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2971 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2972 &new_niter);
2973 if (!new_niter.niter
2974 || !integer_nonzerop (new_niter.assumptions)
2975 || !integer_zerop (new_niter.may_be_zero))
2976 return false;
2978 wider_type = TREE_TYPE (new_niter.niter);
2979 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2980 wider_type = TREE_TYPE (niter->niter);
2981 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2982 fold_convert (wider_type, new_niter.niter), 0))
2983 return false;
2985 return true;
2988 /* Determines cost of basing replacement of USE on CAND in a condition. */
2990 static void
2991 determine_use_iv_cost_condition (struct ivopts_data *data,
2992 struct iv_use *use, struct iv_cand *cand)
2994 tree bound;
2995 enum tree_code compare;
2997 /* Only consider real candidates. */
2998 if (!cand->iv)
3000 set_use_iv_cost (data, use, cand, INFTY, NULL);
3001 return;
3004 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3006 bitmap depends_on = NULL;
3007 unsigned cost = force_var_cost (data, bound, &depends_on);
3009 set_use_iv_cost (data, use, cand, cost, depends_on);
3010 return;
3013 /* The induction variable elimination failed; just express the original
3014 giv. If it is compared with an invariant, note that we cannot get
3015 rid of it. */
3016 if (TREE_CODE (*use->op_p) == SSA_NAME)
3017 record_invariant (data, *use->op_p, true);
3018 else
3020 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3021 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3024 determine_use_iv_cost_generic (data, use, cand);
3027 /* Checks whether it is possible to replace the final value of USE by
3028 a direct computation. If so, the formula is stored to *VALUE. */
3030 static bool
3031 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3033 edge exit;
3034 struct tree_niter_desc *niter;
3036 exit = single_dom_exit (loop);
3037 if (!exit)
3038 return false;
3040 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3041 bb_for_stmt (use->stmt)));
3043 niter = &loop_data (loop)->niter;
3044 if (!niter->niter
3045 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3046 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3047 return false;
3049 *value = iv_value (use->iv, niter->niter);
3051 return true;
3054 /* Determines cost of replacing final value of USE using CAND. */
3056 static void
3057 determine_use_iv_cost_outer (struct ivopts_data *data,
3058 struct iv_use *use, struct iv_cand *cand)
3060 bitmap depends_on;
3061 unsigned cost;
3062 edge exit;
3063 tree value;
3064 struct loop *loop = data->current_loop;
3066 if (!cand->iv)
3068 if (!may_replace_final_value (loop, use, &value))
3070 set_use_iv_cost (data, use, cand, INFTY, NULL);
3071 return;
3074 depends_on = NULL;
3075 cost = force_var_cost (data, value, &depends_on);
3077 cost /= AVG_LOOP_NITER (loop);
3079 set_use_iv_cost (data, use, cand, cost, depends_on);
3080 return;
3083 exit = single_dom_exit (loop);
3084 if (exit)
3086 /* If there is just a single exit, we may use value of the candidate
3087 after we take it to determine the value of use. */
3088 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3089 last_stmt (exit->src));
3090 if (cost != INFTY)
3091 cost /= AVG_LOOP_NITER (loop);
3093 else
3095 /* Otherwise we just need to compute the iv. */
3096 cost = get_computation_cost (data, use, cand, false, &depends_on);
3099 set_use_iv_cost (data, use, cand, cost, depends_on);
3102 /* Determines cost of basing replacement of USE on CAND. */
3104 static void
3105 determine_use_iv_cost (struct ivopts_data *data,
3106 struct iv_use *use, struct iv_cand *cand)
3108 switch (use->type)
3110 case USE_NONLINEAR_EXPR:
3111 determine_use_iv_cost_generic (data, use, cand);
3112 break;
3114 case USE_OUTER:
3115 determine_use_iv_cost_outer (data, use, cand);
3116 break;
3118 case USE_ADDRESS:
3119 determine_use_iv_cost_address (data, use, cand);
3120 break;
3122 case USE_COMPARE:
3123 determine_use_iv_cost_condition (data, use, cand);
3124 break;
3126 default:
3127 gcc_unreachable ();
3131 /* Determines costs of basing the use of the iv on an iv candidate. */
3133 static void
3134 determine_use_iv_costs (struct ivopts_data *data)
3136 unsigned i, j;
3137 struct iv_use *use;
3138 struct iv_cand *cand;
3140 data->consider_all_candidates = (n_iv_cands (data)
3141 <= CONSIDER_ALL_CANDIDATES_BOUND);
3143 alloc_use_cost_map (data);
3145 if (!data->consider_all_candidates)
3147 /* Add the important candidate entries. */
3148 for (j = 0; j < n_iv_cands (data); j++)
3150 cand = iv_cand (data, j);
3151 if (!cand->important)
3152 continue;
3153 for (i = 0; i < n_iv_uses (data); i++)
3155 use = iv_use (data, i);
3156 determine_use_iv_cost (data, use, cand);
3161 for (i = 0; i < n_iv_uses (data); i++)
3163 use = iv_use (data, i);
3165 if (data->consider_all_candidates)
3167 for (j = 0; j < n_iv_cands (data); j++)
3169 cand = iv_cand (data, j);
3170 determine_use_iv_cost (data, use, cand);
3173 else
3175 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3177 cand = iv_cand (data, j);
3178 if (!cand->important)
3179 determine_use_iv_cost (data, use, cand);
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3186 fprintf (dump_file, "Use-candidate costs:\n");
3188 for (i = 0; i < n_iv_uses (data); i++)
3190 use = iv_use (data, i);
3192 fprintf (dump_file, "Use %d:\n", i);
3193 fprintf (dump_file, " cand\tcost\tdepends on\n");
3194 for (j = 0; j < use->n_map_members; j++)
3196 if (!use->cost_map[j].cand
3197 || use->cost_map[j].cost == INFTY)
3198 continue;
3200 fprintf (dump_file, " %d\t%d\t",
3201 use->cost_map[j].cand->id,
3202 use->cost_map[j].cost);
3203 if (use->cost_map[j].depends_on)
3204 bitmap_print (dump_file,
3205 use->cost_map[j].depends_on, "","");
3206 fprintf (dump_file, "\n");
3209 fprintf (dump_file, "\n");
3211 fprintf (dump_file, "\n");
3215 /* Determines cost of the candidate CAND. */
3217 static void
3218 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3220 unsigned cost_base, cost_step;
3221 tree base, last;
3222 basic_block bb;
3224 if (!cand->iv)
3226 cand->cost = 0;
3227 return;
3230 /* There are two costs associated with the candidate -- its increment
3231 and its initialization. The second is almost negligible for any loop
3232 that rolls enough, so we take it just very little into account. */
3234 base = cand->iv->base;
3235 cost_base = force_var_cost (data, base, NULL);
3236 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3238 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3240 /* Prefer the original iv unless we may gain something by replacing it. */
3241 if (cand->pos == IP_ORIGINAL)
3242 cand->cost--;
3244 /* Prefer not to insert statements into latch unless there are some
3245 already (so that we do not create unnecessary jumps). */
3246 if (cand->pos == IP_END)
3248 bb = ip_end_pos (data->current_loop);
3249 last = last_stmt (bb);
3251 if (!last
3252 || TREE_CODE (last) == LABEL_EXPR)
3253 cand->cost++;
3257 /* Determines costs of computation of the candidates. */
3259 static void
3260 determine_iv_costs (struct ivopts_data *data)
3262 unsigned i;
3264 if (dump_file && (dump_flags & TDF_DETAILS))
3266 fprintf (dump_file, "Candidate costs:\n");
3267 fprintf (dump_file, " cand\tcost\n");
3270 for (i = 0; i < n_iv_cands (data); i++)
3272 struct iv_cand *cand = iv_cand (data, i);
3274 determine_iv_cost (data, cand);
3276 if (dump_file && (dump_flags & TDF_DETAILS))
3277 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3280 if (dump_file && (dump_flags & TDF_DETAILS))
3281 fprintf (dump_file, "\n");
3284 /* Calculates cost for having SIZE induction variables. */
3286 static unsigned
3287 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3289 return global_cost_for_size (size,
3290 loop_data (data->current_loop)->regs_used,
3291 n_iv_uses (data));
3294 /* For each size of the induction variable set determine the penalty. */
3296 static void
3297 determine_set_costs (struct ivopts_data *data)
3299 unsigned j, n;
3300 tree phi, op;
3301 struct loop *loop = data->current_loop;
3303 /* We use the following model (definitely improvable, especially the
3304 cost function -- TODO):
3306 We estimate the number of registers available (using MD data), name it A.
3308 We estimate the number of registers used by the loop, name it U. This
3309 number is obtained as the number of loop phi nodes (not counting virtual
3310 registers and bivs) + the number of variables from outside of the loop.
3312 We set a reserve R (free regs that are used for temporary computations,
3313 etc.). For now the reserve is a constant 3.
3315 Let I be the number of induction variables.
3317 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3318 make a lot of ivs without a reason).
3319 -- if A - R < U + I <= A, the cost is I * PRES_COST
3320 -- if U + I > A, the cost is I * PRES_COST and
3321 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3323 if (dump_file && (dump_flags & TDF_DETAILS))
3325 fprintf (dump_file, "Global costs:\n");
3326 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3327 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3328 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3329 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3332 n = 0;
3333 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3335 op = PHI_RESULT (phi);
3337 if (!is_gimple_reg (op))
3338 continue;
3340 if (get_iv (data, op))
3341 continue;
3343 n++;
3346 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3348 struct version_info *info = ver_info (data, j);
3350 if (info->inv_id && info->has_nonlin_use)
3351 n++;
3354 loop_data (loop)->regs_used = n;
3355 if (dump_file && (dump_flags & TDF_DETAILS))
3356 fprintf (dump_file, " regs_used %d\n", n);
3358 if (dump_file && (dump_flags & TDF_DETAILS))
3360 fprintf (dump_file, " cost for size:\n");
3361 fprintf (dump_file, " ivs\tcost\n");
3362 for (j = 0; j <= 2 * target_avail_regs; j++)
3363 fprintf (dump_file, " %d\t%d\n", j,
3364 ivopts_global_cost_for_size (data, j));
3365 fprintf (dump_file, "\n");
3369 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3370 taken from the set SOL and they may depend on invariants in the set INV.
3371 The really used candidate and invariants are noted to USED_IVS and
3372 USED_INV. */
3374 static unsigned
3375 find_best_candidate (struct ivopts_data *data,
3376 struct iv_use *use, bitmap sol, bitmap inv,
3377 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3379 unsigned c, d;
3380 unsigned best_cost = INFTY, cost;
3381 struct iv_cand *cnd = NULL, *acnd;
3382 bitmap depends_on = NULL, asol;
3384 if (data->consider_all_candidates)
3385 asol = sol;
3386 else
3388 asol = BITMAP_XMALLOC ();
3389 bitmap_a_and_b (asol, sol, use->related_cands);
3392 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3394 acnd = iv_cand (data, c);
3395 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3397 if (cost == INFTY)
3398 goto next_cand;
3399 if (cost > best_cost)
3400 goto next_cand;
3401 if (cost == best_cost)
3403 /* Prefer the cheaper iv. */
3404 if (acnd->cost >= cnd->cost)
3405 goto next_cand;
3408 if (depends_on)
3410 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3411 goto next_cand);
3412 if (used_inv)
3413 bitmap_a_or_b (used_inv, used_inv, depends_on);
3416 cnd = acnd;
3417 best_cost = cost;
3418 next_cand: ;
3421 if (cnd && used_ivs)
3422 bitmap_set_bit (used_ivs, cnd->id);
3424 if (cand)
3425 *cand = cnd;
3427 if (!data->consider_all_candidates)
3428 BITMAP_XFREE (asol);
3430 return best_cost;
3433 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3434 induction variable candidates and invariants from the sets. Only
3435 uses 0 .. MAX_USE - 1 are taken into account. */
3437 static unsigned
3438 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3439 unsigned max_use)
3441 unsigned i;
3442 unsigned cost = 0, size = 0, acost;
3443 struct iv_use *use;
3444 struct iv_cand *cand;
3445 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3447 for (i = 0; i < max_use; i++)
3449 use = iv_use (data, i);
3450 acost = find_best_candidate (data, use, sol, inv,
3451 used_ivs, used_inv, NULL);
3452 if (acost == INFTY)
3454 BITMAP_XFREE (used_ivs);
3455 BITMAP_XFREE (used_inv);
3456 return INFTY;
3458 cost += acost;
3461 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3463 cand = iv_cand (data, i);
3465 /* Do not count the pseudocandidates. */
3466 if (cand->iv)
3467 size++;
3469 cost += cand->cost;
3471 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3472 cost += ivopts_global_cost_for_size (data, size);
3474 bitmap_copy (sol, used_ivs);
3475 bitmap_copy (inv, used_inv);
3477 BITMAP_XFREE (used_ivs);
3478 BITMAP_XFREE (used_inv);
3480 return cost;
3483 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3484 induction variable candidates and invariants from the sets. */
3486 static unsigned
3487 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3489 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3492 /* Tries to extend the sets IVS and INV in the best possible way in order
3493 to express the USE. */
3495 static bool
3496 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3497 struct iv_use *use)
3499 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3500 bitmap best_ivs = BITMAP_XMALLOC ();
3501 bitmap best_inv = BITMAP_XMALLOC ();
3502 bitmap act_ivs = BITMAP_XMALLOC ();
3503 bitmap act_inv = BITMAP_XMALLOC ();
3504 unsigned i;
3505 struct cost_pair *cp;
3507 bitmap_copy (best_ivs, ivs);
3508 bitmap_copy (best_inv, inv);
3510 for (i = 0; i < use->n_map_members; i++)
3512 cp = use->cost_map + i;
3513 if (cp->cost == INFTY)
3514 continue;
3516 bitmap_copy (act_ivs, ivs);
3517 bitmap_set_bit (act_ivs, cp->cand->id);
3518 if (cp->depends_on)
3519 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3520 else
3521 bitmap_copy (act_inv, inv);
3522 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3524 if (act_cost < best_cost)
3526 best_cost = act_cost;
3527 bitmap_copy (best_ivs, act_ivs);
3528 bitmap_copy (best_inv, act_inv);
3532 bitmap_copy (ivs, best_ivs);
3533 bitmap_copy (inv, best_inv);
3535 BITMAP_XFREE (best_ivs);
3536 BITMAP_XFREE (best_inv);
3537 BITMAP_XFREE (act_ivs);
3538 BITMAP_XFREE (act_inv);
3540 return (best_cost != INFTY);
3543 /* Finds an initial set of IVS and invariants INV. We do this by simply
3544 choosing the best candidate for each use. */
3546 static unsigned
3547 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3549 unsigned i;
3551 for (i = 0; i < n_iv_uses (data); i++)
3552 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3553 return INFTY;
3555 return set_cost (data, ivs, inv);
3558 /* Tries to improve set of induction variables IVS and invariants INV to get
3559 it better than COST. */
3561 static bool
3562 try_improve_iv_set (struct ivopts_data *data,
3563 bitmap ivs, bitmap inv, unsigned *cost)
3565 unsigned i, acost;
3566 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3567 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3569 /* Try altering the set of induction variables by one. */
3570 for (i = 0; i < n_iv_cands (data); i++)
3572 bitmap_copy (new_ivs, ivs);
3573 bitmap_copy (new_inv, inv);
3575 if (bitmap_bit_p (ivs, i))
3576 bitmap_clear_bit (new_ivs, i);
3577 else
3578 bitmap_set_bit (new_ivs, i);
3580 acost = set_cost (data, new_ivs, new_inv);
3581 if (acost >= *cost)
3582 continue;
3584 if (!best_new_ivs)
3586 best_new_ivs = BITMAP_XMALLOC ();
3587 best_new_inv = BITMAP_XMALLOC ();
3590 *cost = acost;
3591 bitmap_copy (best_new_ivs, new_ivs);
3592 bitmap_copy (best_new_inv, new_inv);
3595 /* Ditto for invariants. */
3596 for (i = 1; i <= data->max_inv_id; i++)
3598 if (ver_info (data, i)->has_nonlin_use)
3599 continue;
3601 bitmap_copy (new_ivs, ivs);
3602 bitmap_copy (new_inv, inv);
3604 if (bitmap_bit_p (inv, i))
3605 bitmap_clear_bit (new_inv, i);
3606 else
3607 bitmap_set_bit (new_inv, i);
3609 acost = set_cost (data, new_ivs, new_inv);
3610 if (acost >= *cost)
3611 continue;
3613 if (!best_new_ivs)
3615 best_new_ivs = BITMAP_XMALLOC ();
3616 best_new_inv = BITMAP_XMALLOC ();
3619 *cost = acost;
3620 bitmap_copy (best_new_ivs, new_ivs);
3621 bitmap_copy (best_new_inv, new_inv);
3624 BITMAP_XFREE (new_ivs);
3625 BITMAP_XFREE (new_inv);
3627 if (!best_new_ivs)
3628 return false;
3630 bitmap_copy (ivs, best_new_ivs);
3631 bitmap_copy (inv, best_new_inv);
3632 BITMAP_XFREE (best_new_ivs);
3633 BITMAP_XFREE (best_new_inv);
3634 return true;
3637 /* Attempts to find the optimal set of induction variables. We do simple
3638 greedy heuristic -- we try to replace at most one candidate in the selected
3639 solution and remove the unused ivs while this improves the cost. */
3641 static bitmap
3642 find_optimal_iv_set (struct ivopts_data *data)
3644 unsigned cost, i;
3645 bitmap set = BITMAP_XMALLOC ();
3646 bitmap inv = BITMAP_XMALLOC ();
3647 struct iv_use *use;
3649 /* Set the upper bound. */
3650 cost = get_initial_solution (data, set, inv);
3651 if (cost == INFTY)
3653 if (dump_file && (dump_flags & TDF_DETAILS))
3654 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3655 BITMAP_XFREE (inv);
3656 BITMAP_XFREE (set);
3657 return NULL;
3660 if (dump_file && (dump_flags & TDF_DETAILS))
3662 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3663 bitmap_print (dump_file, set, "", "");
3664 fprintf (dump_file, " invariants ");
3665 bitmap_print (dump_file, inv, "", "");
3666 fprintf (dump_file, "\n");
3669 while (try_improve_iv_set (data, set, inv, &cost))
3671 if (dump_file && (dump_flags & TDF_DETAILS))
3673 fprintf (dump_file, "Improved to (cost %d): ", cost);
3674 bitmap_print (dump_file, set, "", "");
3675 fprintf (dump_file, " invariants ");
3676 bitmap_print (dump_file, inv, "", "");
3677 fprintf (dump_file, "\n");
3681 if (dump_file && (dump_flags & TDF_DETAILS))
3682 fprintf (dump_file, "Final cost %d\n\n", cost);
3684 for (i = 0; i < n_iv_uses (data); i++)
3686 use = iv_use (data, i);
3687 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3690 BITMAP_XFREE (inv);
3692 return set;
3695 /* Creates a new induction variable corresponding to CAND. */
3697 static void
3698 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3700 block_stmt_iterator incr_pos;
3701 tree base;
3702 bool after = false;
3704 if (!cand->iv)
3705 return;
3707 switch (cand->pos)
3709 case IP_NORMAL:
3710 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3711 break;
3713 case IP_END:
3714 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3715 after = true;
3716 break;
3718 case IP_ORIGINAL:
3719 /* Mark that the iv is preserved. */
3720 name_info (data, cand->var_before)->preserve_biv = true;
3721 name_info (data, cand->var_after)->preserve_biv = true;
3723 /* Rewrite the increment so that it uses var_before directly. */
3724 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3726 return;
3729 gimple_add_tmp_var (cand->var_before);
3730 add_referenced_tmp_var (cand->var_before);
3732 base = unshare_expr (cand->iv->base);
3734 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3735 &incr_pos, after, &cand->var_before, &cand->var_after);
3738 /* Creates new induction variables described in SET. */
3740 static void
3741 create_new_ivs (struct ivopts_data *data, bitmap set)
3743 unsigned i;
3744 struct iv_cand *cand;
3746 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3748 cand = iv_cand (data, i);
3749 create_new_iv (data, cand);
3753 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3754 is true, remove also the ssa name defined by the statement. */
3756 static void
3757 remove_statement (tree stmt, bool including_defined_name)
3759 if (TREE_CODE (stmt) == PHI_NODE)
3761 if (!including_defined_name)
3763 /* Prevent the ssa name defined by the statement from being removed. */
3764 SET_PHI_RESULT (stmt, NULL);
3766 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3768 else
3770 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3772 bsi_remove (&bsi);
3776 /* Rewrites USE (definition of iv used in a nonlinear expression)
3777 using candidate CAND. */
3779 static void
3780 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3781 struct iv_use *use, struct iv_cand *cand)
3783 tree comp = unshare_expr (get_computation (data->current_loop,
3784 use, cand));
3785 tree op, stmts, tgt, ass;
3786 block_stmt_iterator bsi, pbsi;
3788 switch (TREE_CODE (use->stmt))
3790 case PHI_NODE:
3791 tgt = PHI_RESULT (use->stmt);
3793 /* If we should keep the biv, do not replace it. */
3794 if (name_info (data, tgt)->preserve_biv)
3795 return;
3797 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3798 while (!bsi_end_p (pbsi)
3799 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3801 bsi = pbsi;
3802 bsi_next (&pbsi);
3804 break;
3806 case MODIFY_EXPR:
3807 tgt = TREE_OPERAND (use->stmt, 0);
3808 bsi = stmt_for_bsi (use->stmt);
3809 break;
3811 default:
3812 gcc_unreachable ();
3815 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3817 if (TREE_CODE (use->stmt) == PHI_NODE)
3819 if (stmts)
3820 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3821 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3822 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3823 remove_statement (use->stmt, false);
3824 SSA_NAME_DEF_STMT (tgt) = ass;
3826 else
3828 if (stmts)
3829 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3830 TREE_OPERAND (use->stmt, 1) = op;
3834 /* Replaces ssa name in index IDX by its basic variable. Callback for
3835 for_each_index. */
3837 static bool
3838 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3839 void *data ATTRIBUTE_UNUSED)
3841 if (TREE_CODE (*idx) == SSA_NAME)
3842 *idx = SSA_NAME_VAR (*idx);
3843 return true;
3846 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3848 static tree
3849 unshare_and_remove_ssa_names (tree ref)
3851 ref = unshare_expr (ref);
3852 for_each_index (&ref, idx_remove_ssa_names, NULL);
3854 return ref;
3857 /* Rewrites base of memory access OP with expression WITH in statement
3858 pointed to by BSI. */
3860 static void
3861 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3863 tree var = get_base_address (*op), new_var, new_name, copy, name;
3864 tree orig;
3866 if (!var || TREE_CODE (with) != SSA_NAME)
3867 goto do_rewrite;
3869 if (TREE_CODE (var) == INDIRECT_REF)
3870 var = TREE_OPERAND (var, 0);
3871 if (TREE_CODE (var) == SSA_NAME)
3873 name = var;
3874 var = SSA_NAME_VAR (var);
3876 else if (DECL_P (var))
3877 name = NULL_TREE;
3878 else
3879 goto do_rewrite;
3881 if (var_ann (var)->type_mem_tag)
3882 var = var_ann (var)->type_mem_tag;
3884 /* We need to add a memory tag for the variable. But we do not want
3885 to add it to the temporary used for the computations, since this leads
3886 to problems in redundancy elimination when there are common parts
3887 in two computations referring to the different arrays. So we copy
3888 the variable to a new temporary. */
3889 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3890 if (name)
3891 new_name = duplicate_ssa_name (name, copy);
3892 else
3894 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3895 add_referenced_tmp_var (new_var);
3896 var_ann (new_var)->type_mem_tag = var;
3897 new_name = make_ssa_name (new_var, copy);
3899 TREE_OPERAND (copy, 0) = new_name;
3900 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3901 with = new_name;
3903 do_rewrite:
3905 orig = NULL_TREE;
3906 if (TREE_CODE (*op) == INDIRECT_REF)
3907 orig = REF_ORIGINAL (*op);
3908 if (!orig)
3909 orig = unshare_and_remove_ssa_names (*op);
3911 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3912 /* Record the original reference, for purposes of alias analysis. */
3913 REF_ORIGINAL (*op) = orig;
3916 /* Rewrites USE (address that is an iv) using candidate CAND. */
3918 static void
3919 rewrite_use_address (struct ivopts_data *data,
3920 struct iv_use *use, struct iv_cand *cand)
3922 tree comp = unshare_expr (get_computation (data->current_loop,
3923 use, cand));
3924 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3925 tree stmts;
3926 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3928 if (stmts)
3929 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3931 rewrite_address_base (&bsi, use->op_p, op);
3934 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3935 candidate CAND. */
3937 static void
3938 rewrite_use_compare (struct ivopts_data *data,
3939 struct iv_use *use, struct iv_cand *cand)
3941 tree comp;
3942 tree *op_p, cond, op, stmts, bound;
3943 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3944 enum tree_code compare;
3946 if (may_eliminate_iv (data->current_loop,
3947 use, cand, &compare, &bound))
3949 op = force_gimple_operand (unshare_expr (bound), &stmts,
3950 true, NULL_TREE);
3952 if (stmts)
3953 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3955 *use->op_p = build2 (compare, boolean_type_node,
3956 var_at_stmt (data->current_loop,
3957 cand, use->stmt), op);
3958 modify_stmt (use->stmt);
3959 return;
3962 /* The induction variable elimination failed; just express the original
3963 giv. */
3964 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3966 cond = *use->op_p;
3967 op_p = &TREE_OPERAND (cond, 0);
3968 if (TREE_CODE (*op_p) != SSA_NAME
3969 || zero_p (get_iv (data, *op_p)->step))
3970 op_p = &TREE_OPERAND (cond, 1);
3972 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3973 if (stmts)
3974 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3976 *op_p = op;
3979 /* Ensure that operand *OP_P may be used at the end of EXIT without
3980 violating loop closed ssa form. */
3982 static void
3983 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3985 basic_block def_bb;
3986 struct loop *def_loop;
3987 tree phi, use;
3989 use = USE_FROM_PTR (op_p);
3990 if (TREE_CODE (use) != SSA_NAME)
3991 return;
3993 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3994 if (!def_bb)
3995 return;
3997 def_loop = def_bb->loop_father;
3998 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3999 return;
4001 /* Try finding a phi node that copies the value out of the loop. */
4002 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4003 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4004 break;
4006 if (!phi)
4008 /* Create such a phi node. */
4009 tree new_name = duplicate_ssa_name (use, NULL);
4011 phi = create_phi_node (new_name, exit->dest);
4012 SSA_NAME_DEF_STMT (new_name) = phi;
4013 add_phi_arg (&phi, use, exit);
4016 SET_USE (op_p, PHI_RESULT (phi));
4019 /* Ensure that operands of STMT may be used at the end of EXIT without
4020 violating loop closed ssa form. */
4022 static void
4023 protect_loop_closed_ssa_form (edge exit, tree stmt)
4025 use_optype uses;
4026 vuse_optype vuses;
4027 v_may_def_optype v_may_defs;
4028 unsigned i;
4030 get_stmt_operands (stmt);
4032 uses = STMT_USE_OPS (stmt);
4033 for (i = 0; i < NUM_USES (uses); i++)
4034 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4036 vuses = STMT_VUSE_OPS (stmt);
4037 for (i = 0; i < NUM_VUSES (vuses); i++)
4038 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4040 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4041 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4042 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4045 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4046 so that they are emitted on the correct place, and so that the loop closed
4047 ssa form is preserved. */
4049 static void
4050 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4052 tree_stmt_iterator tsi;
4053 block_stmt_iterator bsi;
4054 tree phi, stmt, def, next;
4056 if (exit->dest->pred->pred_next)
4057 split_loop_exit_edge (exit);
4059 if (TREE_CODE (stmts) == STATEMENT_LIST)
4061 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4062 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4064 else
4065 protect_loop_closed_ssa_form (exit, stmts);
4067 /* Ensure there is label in exit->dest, so that we can
4068 insert after it. */
4069 tree_block_label (exit->dest);
4070 bsi = bsi_after_labels (exit->dest);
4071 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4073 if (!op)
4074 return;
4076 for (phi = phi_nodes (exit->dest); phi; phi = next)
4078 next = TREE_CHAIN (phi);
4080 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4082 def = PHI_RESULT (phi);
4083 remove_statement (phi, false);
4084 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4085 def, op);
4086 SSA_NAME_DEF_STMT (def) = stmt;
4087 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4092 /* Rewrites the final value of USE (that is only needed outside of the loop)
4093 using candidate CAND. */
4095 static void
4096 rewrite_use_outer (struct ivopts_data *data,
4097 struct iv_use *use, struct iv_cand *cand)
4099 edge exit;
4100 tree value, op, stmts, tgt;
4101 tree phi;
4103 switch (TREE_CODE (use->stmt))
4105 case PHI_NODE:
4106 tgt = PHI_RESULT (use->stmt);
4107 break;
4108 case MODIFY_EXPR:
4109 tgt = TREE_OPERAND (use->stmt, 0);
4110 break;
4111 default:
4112 gcc_unreachable ();
4115 exit = single_dom_exit (data->current_loop);
4117 if (exit)
4119 if (!cand->iv)
4121 bool ok = may_replace_final_value (data->current_loop, use, &value);
4122 gcc_assert (ok);
4124 else
4125 value = get_computation_at (data->current_loop,
4126 use, cand, last_stmt (exit->src));
4128 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4130 /* If we will preserve the iv anyway and we would need to perform
4131 some computation to replace the final value, do nothing. */
4132 if (stmts && name_info (data, tgt)->preserve_biv)
4133 return;
4135 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4137 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4139 if (USE_FROM_PTR (use_p) == tgt)
4140 SET_USE (use_p, op);
4143 if (stmts)
4144 compute_phi_arg_on_exit (exit, stmts, op);
4146 /* Enable removal of the statement. We cannot remove it directly,
4147 since we may still need the aliasing information attached to the
4148 ssa name defined by it. */
4149 name_info (data, tgt)->iv->have_use_for = false;
4150 return;
4153 /* If the variable is going to be preserved anyway, there is nothing to
4154 do. */
4155 if (name_info (data, tgt)->preserve_biv)
4156 return;
4158 /* Otherwise we just need to compute the iv. */
4159 rewrite_use_nonlinear_expr (data, use, cand);
4162 /* Rewrites USE using candidate CAND. */
4164 static void
4165 rewrite_use (struct ivopts_data *data,
4166 struct iv_use *use, struct iv_cand *cand)
4168 switch (use->type)
4170 case USE_NONLINEAR_EXPR:
4171 rewrite_use_nonlinear_expr (data, use, cand);
4172 break;
4174 case USE_OUTER:
4175 rewrite_use_outer (data, use, cand);
4176 break;
4178 case USE_ADDRESS:
4179 rewrite_use_address (data, use, cand);
4180 break;
4182 case USE_COMPARE:
4183 rewrite_use_compare (data, use, cand);
4184 break;
4186 default:
4187 gcc_unreachable ();
4189 modify_stmt (use->stmt);
4192 /* Rewrite the uses using the selected induction variables. */
4194 static void
4195 rewrite_uses (struct ivopts_data *data)
4197 unsigned i;
4198 struct iv_cand *cand;
4199 struct iv_use *use;
4201 for (i = 0; i < n_iv_uses (data); i++)
4203 use = iv_use (data, i);
4204 cand = use->selected;
4205 gcc_assert (cand);
4207 rewrite_use (data, use, cand);
4211 /* Removes the ivs that are not used after rewriting. */
4213 static void
4214 remove_unused_ivs (struct ivopts_data *data)
4216 unsigned j;
4218 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4220 struct version_info *info;
4222 info = ver_info (data, j);
4223 if (info->iv
4224 && !zero_p (info->iv->step)
4225 && !info->inv_id
4226 && !info->iv->have_use_for
4227 && !info->preserve_biv)
4228 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4232 /* Frees data allocated by the optimization of a single loop. */
4234 static void
4235 free_loop_data (struct ivopts_data *data)
4237 unsigned i, j;
4239 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4241 struct version_info *info;
4243 info = ver_info (data, i);
4244 if (info->iv)
4245 free (info->iv);
4246 info->iv = NULL;
4247 info->has_nonlin_use = false;
4248 info->preserve_biv = false;
4249 info->inv_id = 0;
4251 bitmap_clear (data->relevant);
4253 for (i = 0; i < n_iv_uses (data); i++)
4255 struct iv_use *use = iv_use (data, i);
4257 free (use->iv);
4258 BITMAP_XFREE (use->related_cands);
4259 for (j = 0; j < use->n_map_members; j++)
4260 if (use->cost_map[j].depends_on)
4261 BITMAP_XFREE (use->cost_map[j].depends_on);
4262 free (use->cost_map);
4263 free (use);
4265 VARRAY_POP_ALL (data->iv_uses);
4267 for (i = 0; i < n_iv_cands (data); i++)
4269 struct iv_cand *cand = iv_cand (data, i);
4271 if (cand->iv)
4272 free (cand->iv);
4273 free (cand);
4275 VARRAY_POP_ALL (data->iv_candidates);
4277 if (data->version_info_size < num_ssa_names)
4279 data->version_info_size = 2 * num_ssa_names;
4280 free (data->version_info);
4281 data->version_info = xcalloc (data->version_info_size,
4282 sizeof (struct version_info));
4285 data->max_inv_id = 0;
4287 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4289 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4291 SET_DECL_RTL (obj, NULL_RTX);
4293 VARRAY_POP_ALL (decl_rtl_to_reset);
4296 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4297 loop tree. */
4299 static void
4300 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4302 unsigned i;
4304 for (i = 1; i < loops->num; i++)
4305 if (loops->parray[i])
4307 free (loops->parray[i]->aux);
4308 loops->parray[i]->aux = NULL;
4311 free_loop_data (data);
4312 free (data->version_info);
4313 BITMAP_XFREE (data->relevant);
4315 VARRAY_FREE (decl_rtl_to_reset);
4316 VARRAY_FREE (data->iv_uses);
4317 VARRAY_FREE (data->iv_candidates);
4320 /* Optimizes the LOOP. Returns true if anything changed. */
4322 static bool
4323 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4325 bool changed = false;
4326 bitmap iv_set;
4327 edge exit;
4329 data->current_loop = loop;
4331 if (dump_file && (dump_flags & TDF_DETAILS))
4333 fprintf (dump_file, "Processing loop %d\n", loop->num);
4335 exit = single_dom_exit (loop);
4336 if (exit)
4338 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4339 exit->src->index, exit->dest->index);
4340 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4341 fprintf (dump_file, "\n");
4344 fprintf (dump_file, "\n");
4347 /* For each ssa name determines whether it behaves as an induction variable
4348 in some loop. */
4349 if (!find_induction_variables (data))
4350 goto finish;
4352 /* Finds interesting uses (item 1). */
4353 find_interesting_uses (data);
4354 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4355 goto finish;
4357 /* Finds candidates for the induction variables (item 2). */
4358 find_iv_candidates (data);
4360 /* Calculates the costs (item 3, part 1). */
4361 determine_use_iv_costs (data);
4362 determine_iv_costs (data);
4363 determine_set_costs (data);
4365 /* Find the optimal set of induction variables (item 3, part 2). */
4366 iv_set = find_optimal_iv_set (data);
4367 if (!iv_set)
4368 goto finish;
4369 changed = true;
4371 /* Create the new induction variables (item 4, part 1). */
4372 create_new_ivs (data, iv_set);
4374 /* Rewrite the uses (item 4, part 2). */
4375 rewrite_uses (data);
4377 /* Remove the ivs that are unused after rewriting. */
4378 remove_unused_ivs (data);
4380 loop_commit_inserts ();
4382 BITMAP_XFREE (iv_set);
4384 /* We have changed the structure of induction variables; it might happen
4385 that definitions in the scev database refer to some of them that were
4386 eliminated. */
4387 scev_reset ();
4389 finish:
4390 free_loop_data (data);
4392 return changed;
4395 /* Main entry point. Optimizes induction variables in LOOPS. */
4397 void
4398 tree_ssa_iv_optimize (struct loops *loops)
4400 struct loop *loop;
4401 struct ivopts_data data;
4403 tree_ssa_iv_optimize_init (loops, &data);
4405 /* Optimize the loops starting with the innermost ones. */
4406 loop = loops->tree_root;
4407 while (loop->inner)
4408 loop = loop->inner;
4410 #ifdef ENABLE_CHECKING
4411 verify_loop_closed_ssa ();
4412 verify_stmts ();
4413 #endif
4415 /* Scan the loops, inner ones first. */
4416 while (loop != loops->tree_root)
4418 if (dump_file && (dump_flags & TDF_DETAILS))
4419 flow_loop_dump (loop, dump_file, NULL, 1);
4420 if (tree_ssa_iv_optimize_loop (&data, loop))
4422 #ifdef ENABLE_CHECKING
4423 verify_loop_closed_ssa ();
4424 verify_stmts ();
4425 #endif
4428 if (loop->next)
4430 loop = loop->next;
4431 while (loop->inner)
4432 loop = loop->inner;
4434 else
4435 loop = loop->outer;
4438 tree_ssa_iv_optimize_finalize (loops, &data);