2004-12-01 David Edelsohn <edelsohn@gnu.org>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
bloba08923b9636081ef3cb3a99de61d3e48e8ff927d
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
129 unsigned regs_used; /* Number of registers used. */
132 /* Types of uses. */
133 enum use_type
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
141 /* The candidate - cost pair. */
142 struct cost_pair
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
197 struct ivopts_data
199 /* The currently optimized loop. */
200 struct loop *current_loop;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* Total number of registers needed. */
248 unsigned n_regs;
250 /* Total cost of expressing uses. */
251 unsigned cand_use_cost;
253 /* Total cost of candidates. */
254 unsigned cand_cost;
256 /* Number of times each invariant is used. */
257 unsigned *n_invariant_uses;
259 /* Total cost of the assignment. */
260 unsigned cost;
263 /* Difference of two iv candidate assignments. */
265 struct iv_ca_delta
267 /* Changed use. */
268 struct iv_use *use;
270 /* An old assignment (for rollback purposes). */
271 struct cost_pair *old_cp;
273 /* A new assignment. */
274 struct cost_pair *new_cp;
276 /* Next change in the list. */
277 struct iv_ca_delta *next_change;
280 /* Bound on number of candidates below that all candidates are considered. */
282 #define CONSIDER_ALL_CANDIDATES_BOUND \
283 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
285 /* If there are more iv occurrences, we just give up (it is quite unlikely that
286 optimizing such a loop would help, and it would take ages). */
288 #define MAX_CONSIDERED_USES \
289 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
291 /* The list of trees for that the decl_rtl field must be reset is stored
292 here. */
294 static varray_type decl_rtl_to_reset;
296 /* Number of uses recorded in DATA. */
298 static inline unsigned
299 n_iv_uses (struct ivopts_data *data)
301 return VARRAY_ACTIVE_SIZE (data->iv_uses);
304 /* Ith use recorded in DATA. */
306 static inline struct iv_use *
307 iv_use (struct ivopts_data *data, unsigned i)
309 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
312 /* Number of candidates recorded in DATA. */
314 static inline unsigned
315 n_iv_cands (struct ivopts_data *data)
317 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
320 /* Ith candidate recorded in DATA. */
322 static inline struct iv_cand *
323 iv_cand (struct ivopts_data *data, unsigned i)
325 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
328 /* The data for LOOP. */
330 static inline struct loop_data *
331 loop_data (struct loop *loop)
333 return loop->aux;
336 /* The single loop exit if it dominates the latch, NULL otherwise. */
338 static edge
339 single_dom_exit (struct loop *loop)
341 edge exit = loop->single_exit;
343 if (!exit)
344 return NULL;
346 if (!just_once_each_iteration_p (loop, exit->src))
347 return NULL;
349 return exit;
352 /* Dumps information about the induction variable IV to FILE. */
354 extern void dump_iv (FILE *, struct iv *);
355 void
356 dump_iv (FILE *file, struct iv *iv)
358 if (iv->ssa_name)
360 fprintf (file, "ssa name ");
361 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
362 fprintf (file, "\n");
365 fprintf (file, " type ");
366 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
367 fprintf (file, "\n");
369 if (iv->step)
371 fprintf (file, " base ");
372 print_generic_expr (file, iv->base, TDF_SLIM);
373 fprintf (file, "\n");
375 fprintf (file, " step ");
376 print_generic_expr (file, iv->step, TDF_SLIM);
377 fprintf (file, "\n");
379 else
381 fprintf (file, " invariant ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
386 if (iv->base_object)
388 fprintf (file, " base object ");
389 print_generic_expr (file, iv->base_object, TDF_SLIM);
390 fprintf (file, "\n");
393 if (iv->biv_p)
394 fprintf (file, " is a biv\n");
397 /* Dumps information about the USE to FILE. */
399 extern void dump_use (FILE *, struct iv_use *);
400 void
401 dump_use (FILE *file, struct iv_use *use)
403 fprintf (file, "use %d\n", use->id);
405 switch (use->type)
407 case USE_NONLINEAR_EXPR:
408 fprintf (file, " generic\n");
409 break;
411 case USE_OUTER:
412 fprintf (file, " outside\n");
413 break;
415 case USE_ADDRESS:
416 fprintf (file, " address\n");
417 break;
419 case USE_COMPARE:
420 fprintf (file, " compare\n");
421 break;
423 default:
424 gcc_unreachable ();
427 fprintf (file, " in statement ");
428 print_generic_expr (file, use->stmt, TDF_SLIM);
429 fprintf (file, "\n");
431 fprintf (file, " at position ");
432 if (use->op_p)
433 print_generic_expr (file, *use->op_p, TDF_SLIM);
434 fprintf (file, "\n");
436 dump_iv (file, use->iv);
438 fprintf (file, " related candidates ");
439 dump_bitmap (file, use->related_cands);
442 /* Dumps information about the uses to FILE. */
444 extern void dump_uses (FILE *, struct ivopts_data *);
445 void
446 dump_uses (FILE *file, struct ivopts_data *data)
448 unsigned i;
449 struct iv_use *use;
451 for (i = 0; i < n_iv_uses (data); i++)
453 use = iv_use (data, i);
455 dump_use (file, use);
456 fprintf (file, "\n");
460 /* Dumps information about induction variable candidate CAND to FILE. */
462 extern void dump_cand (FILE *, struct iv_cand *);
463 void
464 dump_cand (FILE *file, struct iv_cand *cand)
466 struct iv *iv = cand->iv;
468 fprintf (file, "candidate %d%s\n",
469 cand->id, cand->important ? " (important)" : "");
471 if (!iv)
473 fprintf (file, " final value replacement\n");
474 return;
477 switch (cand->pos)
479 case IP_NORMAL:
480 fprintf (file, " incremented before exit test\n");
481 break;
483 case IP_END:
484 fprintf (file, " incremented at end\n");
485 break;
487 case IP_ORIGINAL:
488 fprintf (file, " original biv\n");
489 break;
492 dump_iv (file, iv);
495 /* Returns the info for ssa version VER. */
497 static inline struct version_info *
498 ver_info (struct ivopts_data *data, unsigned ver)
500 return data->version_info + ver;
503 /* Returns the info for ssa name NAME. */
505 static inline struct version_info *
506 name_info (struct ivopts_data *data, tree name)
508 return ver_info (data, SSA_NAME_VERSION (name));
511 /* Checks whether there exists number X such that X * B = A, counting modulo
512 2^BITS. */
514 static bool
515 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
516 HOST_WIDE_INT *x)
518 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
519 unsigned HOST_WIDE_INT inv, ex, val;
520 unsigned i;
522 a &= mask;
523 b &= mask;
525 /* First divide the whole equation by 2 as long as possible. */
526 while (!(a & 1) && !(b & 1))
528 a >>= 1;
529 b >>= 1;
530 bits--;
531 mask >>= 1;
534 if (!(b & 1))
536 /* If b is still even, a is odd and there is no such x. */
537 return false;
540 /* Find the inverse of b. We compute it as
541 b^(2^(bits - 1) - 1) (mod 2^bits). */
542 inv = 1;
543 ex = b;
544 for (i = 0; i < bits - 1; i++)
546 inv = (inv * ex) & mask;
547 ex = (ex * ex) & mask;
550 val = (a * inv) & mask;
552 gcc_assert (((val * b) & mask) == a);
554 if ((val >> (bits - 1)) & 1)
555 val |= ~mask;
557 *x = val;
559 return true;
562 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
563 emitted in LOOP. */
565 static bool
566 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
568 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
570 gcc_assert (bb);
572 if (sbb == loop->latch)
573 return true;
575 if (sbb != bb)
576 return false;
578 return stmt == last_stmt (bb);
581 /* Returns true if STMT if after the place where the original induction
582 variable CAND is incremented. */
584 static bool
585 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
587 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
588 basic_block stmt_bb = bb_for_stmt (stmt);
589 block_stmt_iterator bsi;
591 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
592 return false;
594 if (stmt_bb != cand_bb)
595 return true;
597 /* Scan the block from the end, since the original ivs are usually
598 incremented at the end of the loop body. */
599 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
601 if (bsi_stmt (bsi) == cand->incremented_at)
602 return false;
603 if (bsi_stmt (bsi) == stmt)
604 return true;
608 /* Returns true if STMT if after the place where the induction variable
609 CAND is incremented in LOOP. */
611 static bool
612 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
614 switch (cand->pos)
616 case IP_END:
617 return false;
619 case IP_NORMAL:
620 return stmt_after_ip_normal_pos (loop, stmt);
622 case IP_ORIGINAL:
623 return stmt_after_ip_original_pos (cand, stmt);
625 default:
626 gcc_unreachable ();
630 /* Initializes data structures used by the iv optimization pass, stored
631 in DATA. LOOPS is the loop tree. */
633 static void
634 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
636 unsigned i;
638 data->version_info_size = 2 * num_ssa_names;
639 data->version_info = xcalloc (data->version_info_size,
640 sizeof (struct version_info));
641 data->relevant = BITMAP_XMALLOC ();
642 data->important_candidates = BITMAP_XMALLOC ();
643 data->max_inv_id = 0;
645 for (i = 1; i < loops->num; i++)
646 if (loops->parray[i])
647 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
649 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
650 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
651 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
654 /* Returns a memory object to that EXPR points. In case we are able to
655 determine that it does not point to any such object, NULL is returned. */
657 static tree
658 determine_base_object (tree expr)
660 enum tree_code code = TREE_CODE (expr);
661 tree base, obj, op0, op1;
663 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
664 return NULL_TREE;
666 switch (code)
668 case INTEGER_CST:
669 return NULL_TREE;
671 case ADDR_EXPR:
672 obj = TREE_OPERAND (expr, 0);
673 base = get_base_address (obj);
675 if (!base)
676 return fold_convert (ptr_type_node, expr);
678 if (TREE_CODE (base) == INDIRECT_REF)
679 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
681 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
683 case PLUS_EXPR:
684 case MINUS_EXPR:
685 op0 = determine_base_object (TREE_OPERAND (expr, 0));
686 op1 = determine_base_object (TREE_OPERAND (expr, 1));
688 if (!op1)
689 return op0;
691 if (!op0)
692 return (code == PLUS_EXPR
693 ? op1
694 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
696 return fold (build (code, ptr_type_node, op0, op1));
698 default:
699 return fold_convert (ptr_type_node, expr);
703 /* Allocates an induction variable with given initial value BASE and step STEP
704 for loop LOOP. */
706 static struct iv *
707 alloc_iv (tree base, tree step)
709 struct iv *iv = xcalloc (1, sizeof (struct iv));
711 if (step && integer_zerop (step))
712 step = NULL_TREE;
714 iv->base = base;
715 iv->base_object = determine_base_object (base);
716 iv->step = step;
717 iv->biv_p = false;
718 iv->have_use_for = false;
719 iv->use_id = 0;
720 iv->ssa_name = NULL_TREE;
722 return iv;
725 /* Sets STEP and BASE for induction variable IV. */
727 static void
728 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
730 struct version_info *info = name_info (data, iv);
732 gcc_assert (!info->iv);
734 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
735 info->iv = alloc_iv (base, step);
736 info->iv->ssa_name = iv;
739 /* Finds induction variable declaration for VAR. */
741 static struct iv *
742 get_iv (struct ivopts_data *data, tree var)
744 basic_block bb;
746 if (!name_info (data, var)->iv)
748 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
750 if (!bb
751 || !flow_bb_inside_loop_p (data->current_loop, bb))
752 set_iv (data, var, var, NULL_TREE);
755 return name_info (data, var)->iv;
758 /* Determines the step of a biv defined in PHI. */
760 static tree
761 determine_biv_step (tree phi)
763 struct loop *loop = bb_for_stmt (phi)->loop_father;
764 tree name = PHI_RESULT (phi), base, step;
765 tree type = TREE_TYPE (name);
767 if (!is_gimple_reg (name))
768 return NULL_TREE;
770 if (!simple_iv (loop, phi, name, &base, &step))
771 return NULL_TREE;
773 if (!step)
774 return build_int_cst (type, 0);
776 return step;
779 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
781 static bool
782 abnormal_ssa_name_p (tree exp)
784 if (!exp)
785 return false;
787 if (TREE_CODE (exp) != SSA_NAME)
788 return false;
790 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
793 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
794 abnormal phi node. Callback for for_each_index. */
796 static bool
797 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
798 void *data ATTRIBUTE_UNUSED)
800 if (TREE_CODE (base) == ARRAY_REF)
802 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
803 return false;
804 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
805 return false;
808 return !abnormal_ssa_name_p (*index);
811 /* Returns true if EXPR contains a ssa name that occurs in an
812 abnormal phi node. */
814 static bool
815 contains_abnormal_ssa_name_p (tree expr)
817 enum tree_code code = TREE_CODE (expr);
818 enum tree_code_class class = TREE_CODE_CLASS (code);
820 if (code == SSA_NAME)
821 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
823 if (code == INTEGER_CST
824 || is_gimple_min_invariant (expr))
825 return false;
827 if (code == ADDR_EXPR)
828 return !for_each_index (&TREE_OPERAND (expr, 1),
829 idx_contains_abnormal_ssa_name_p,
830 NULL);
832 switch (class)
834 case tcc_binary:
835 case tcc_comparison:
836 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
837 return true;
839 /* Fallthru. */
840 case tcc_unary:
841 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
842 return true;
844 break;
846 default:
847 gcc_unreachable ();
850 return false;
853 /* Finds basic ivs. */
855 static bool
856 find_bivs (struct ivopts_data *data)
858 tree phi, step, type, base;
859 bool found = false;
860 struct loop *loop = data->current_loop;
862 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
864 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
865 continue;
867 step = determine_biv_step (phi);
869 if (!step)
870 continue;
871 if (cst_and_fits_in_hwi (step)
872 && int_cst_value (step) == 0)
873 continue;
875 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
876 if (contains_abnormal_ssa_name_p (base))
877 continue;
879 type = TREE_TYPE (PHI_RESULT (phi));
880 base = fold_convert (type, base);
881 step = fold_convert (type, step);
883 /* FIXME: We do not handle induction variables whose step does
884 not satisfy cst_and_fits_in_hwi. */
885 if (!cst_and_fits_in_hwi (step))
886 continue;
888 set_iv (data, PHI_RESULT (phi), base, step);
889 found = true;
892 return found;
895 /* Marks basic ivs. */
897 static void
898 mark_bivs (struct ivopts_data *data)
900 tree phi, var;
901 struct iv *iv, *incr_iv;
902 struct loop *loop = data->current_loop;
903 basic_block incr_bb;
905 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
907 iv = get_iv (data, PHI_RESULT (phi));
908 if (!iv)
909 continue;
911 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
912 incr_iv = get_iv (data, var);
913 if (!incr_iv)
914 continue;
916 /* If the increment is in the subloop, ignore it. */
917 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
918 if (incr_bb->loop_father != data->current_loop
919 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
920 continue;
922 iv->biv_p = true;
923 incr_iv->biv_p = true;
927 /* Checks whether STMT defines a linear induction variable and stores its
928 parameters to BASE and STEP. */
930 static bool
931 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
932 tree *base, tree *step)
934 tree lhs;
935 struct loop *loop = data->current_loop;
937 *base = NULL_TREE;
938 *step = NULL_TREE;
940 if (TREE_CODE (stmt) != MODIFY_EXPR)
941 return false;
943 lhs = TREE_OPERAND (stmt, 0);
944 if (TREE_CODE (lhs) != SSA_NAME)
945 return false;
947 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
948 return false;
950 /* FIXME: We do not handle induction variables whose step does
951 not satisfy cst_and_fits_in_hwi. */
952 if (!zero_p (*step)
953 && !cst_and_fits_in_hwi (*step))
954 return false;
956 if (contains_abnormal_ssa_name_p (*base))
957 return false;
959 return true;
962 /* Finds general ivs in statement STMT. */
964 static void
965 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
967 tree base, step;
969 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
970 return;
972 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
975 /* Finds general ivs in basic block BB. */
977 static void
978 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
980 block_stmt_iterator bsi;
982 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
983 find_givs_in_stmt (data, bsi_stmt (bsi));
986 /* Finds general ivs. */
988 static void
989 find_givs (struct ivopts_data *data)
991 struct loop *loop = data->current_loop;
992 basic_block *body = get_loop_body_in_dom_order (loop);
993 unsigned i;
995 for (i = 0; i < loop->num_nodes; i++)
996 find_givs_in_bb (data, body[i]);
997 free (body);
1000 /* Determine the number of iterations of the current loop. */
1002 static void
1003 determine_number_of_iterations (struct ivopts_data *data)
1005 struct loop *loop = data->current_loop;
1006 edge exit = single_dom_exit (loop);
1008 if (!exit)
1009 return;
1011 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1014 /* For each ssa name defined in LOOP determines whether it is an induction
1015 variable and if so, its initial value and step. */
1017 static bool
1018 find_induction_variables (struct ivopts_data *data)
1020 unsigned i;
1021 struct loop *loop = data->current_loop;
1022 bitmap_iterator bi;
1024 if (!find_bivs (data))
1025 return false;
1027 find_givs (data);
1028 mark_bivs (data);
1029 determine_number_of_iterations (data);
1031 if (dump_file && (dump_flags & TDF_DETAILS))
1033 if (loop_data (loop)->niter.niter)
1035 fprintf (dump_file, " number of iterations ");
1036 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1037 TDF_SLIM);
1038 fprintf (dump_file, "\n");
1040 fprintf (dump_file, " may be zero if ");
1041 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1042 TDF_SLIM);
1043 fprintf (dump_file, "\n");
1045 fprintf (dump_file, " bogus unless ");
1046 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1047 TDF_SLIM);
1048 fprintf (dump_file, "\n");
1049 fprintf (dump_file, "\n");
1052 fprintf (dump_file, "Induction variables:\n\n");
1054 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1056 if (ver_info (data, i)->iv)
1057 dump_iv (dump_file, ver_info (data, i)->iv);
1061 return true;
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068 tree stmt, enum use_type use_type)
1070 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1072 use->id = n_iv_uses (data);
1073 use->type = use_type;
1074 use->iv = iv;
1075 use->stmt = stmt;
1076 use->op_p = use_p;
1077 use->related_cands = BITMAP_XMALLOC ();
1079 /* To avoid showing ssa name in the dumps, if it was not reset by the
1080 caller. */
1081 iv->ssa_name = NULL_TREE;
1083 if (dump_file && (dump_flags & TDF_DETAILS))
1084 dump_use (dump_file, use);
1086 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1088 return use;
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092 NONLINEAR_USE is true if the invariant is used in a way we do not
1093 handle specially. */
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1098 basic_block bb;
1099 struct version_info *info;
1101 if (TREE_CODE (op) != SSA_NAME
1102 || !is_gimple_reg (op))
1103 return;
1105 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106 if (bb
1107 && flow_bb_inside_loop_p (data->current_loop, bb))
1108 return;
1110 info = name_info (data, op);
1111 info->name = op;
1112 info->has_nonlin_use |= nonlinear_use;
1113 if (!info->inv_id)
1114 info->inv_id = ++data->max_inv_id;
1115 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1118 /* Checks whether the use OP is interesting and if so, records it
1119 as TYPE. */
1121 static struct iv_use *
1122 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1123 enum use_type type)
1125 struct iv *iv;
1126 struct iv *civ;
1127 tree stmt;
1128 struct iv_use *use;
1130 if (TREE_CODE (op) != SSA_NAME)
1131 return NULL;
1133 iv = get_iv (data, op);
1134 if (!iv)
1135 return NULL;
1137 if (iv->have_use_for)
1139 use = iv_use (data, iv->use_id);
1141 gcc_assert (use->type == USE_NONLINEAR_EXPR
1142 || use->type == USE_OUTER);
1144 if (type == USE_NONLINEAR_EXPR)
1145 use->type = USE_NONLINEAR_EXPR;
1146 return use;
1149 if (zero_p (iv->step))
1151 record_invariant (data, op, true);
1152 return NULL;
1154 iv->have_use_for = true;
1156 civ = xmalloc (sizeof (struct iv));
1157 *civ = *iv;
1159 stmt = SSA_NAME_DEF_STMT (op);
1160 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1161 || TREE_CODE (stmt) == MODIFY_EXPR);
1163 use = record_use (data, NULL, civ, stmt, type);
1164 iv->use_id = use->id;
1166 return use;
1169 /* Checks whether the use OP is interesting and if so, records it. */
1171 static struct iv_use *
1172 find_interesting_uses_op (struct ivopts_data *data, tree op)
1174 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1177 /* Records a definition of induction variable OP that is used outside of the
1178 loop. */
1180 static struct iv_use *
1181 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1183 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1186 /* Checks whether the condition *COND_P in STMT is interesting
1187 and if so, records it. */
1189 static void
1190 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1192 tree *op0_p;
1193 tree *op1_p;
1194 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1195 struct iv const_iv;
1196 tree zero = integer_zero_node;
1198 const_iv.step = NULL_TREE;
1200 if (integer_zerop (*cond_p)
1201 || integer_nonzerop (*cond_p))
1202 return;
1204 if (TREE_CODE (*cond_p) == SSA_NAME)
1206 op0_p = cond_p;
1207 op1_p = &zero;
1209 else
1211 op0_p = &TREE_OPERAND (*cond_p, 0);
1212 op1_p = &TREE_OPERAND (*cond_p, 1);
1215 if (TREE_CODE (*op0_p) == SSA_NAME)
1216 iv0 = get_iv (data, *op0_p);
1217 else
1218 iv0 = &const_iv;
1220 if (TREE_CODE (*op1_p) == SSA_NAME)
1221 iv1 = get_iv (data, *op1_p);
1222 else
1223 iv1 = &const_iv;
1225 if (/* When comparing with non-invariant value, we may not do any senseful
1226 induction variable elimination. */
1227 (!iv0 || !iv1)
1228 /* Eliminating condition based on two ivs would be nontrivial.
1229 ??? TODO -- it is not really important to handle this case. */
1230 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1232 find_interesting_uses_op (data, *op0_p);
1233 find_interesting_uses_op (data, *op1_p);
1234 return;
1237 if (zero_p (iv0->step) && zero_p (iv1->step))
1239 /* If both are invariants, this is a work for unswitching. */
1240 return;
1243 civ = xmalloc (sizeof (struct iv));
1244 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1245 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1248 /* Returns true if expression EXPR is obviously invariant in LOOP,
1249 i.e. if all its operands are defined outside of the LOOP. */
1251 bool
1252 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1254 basic_block def_bb;
1255 unsigned i, len;
1257 if (is_gimple_min_invariant (expr))
1258 return true;
1260 if (TREE_CODE (expr) == SSA_NAME)
1262 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1263 if (def_bb
1264 && flow_bb_inside_loop_p (loop, def_bb))
1265 return false;
1267 return true;
1270 if (!EXPR_P (expr))
1271 return false;
1273 len = first_rtl_op (TREE_CODE (expr));
1274 for (i = 0; i < len; i++)
1275 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1276 return false;
1278 return true;
1281 /* Cumulates the steps of indices into DATA and replaces their values with the
1282 initial ones. Returns false when the value of the index cannot be determined.
1283 Callback for for_each_index. */
1285 struct ifs_ivopts_data
1287 struct ivopts_data *ivopts_data;
1288 tree stmt;
1289 tree *step_p;
1292 static bool
1293 idx_find_step (tree base, tree *idx, void *data)
1295 struct ifs_ivopts_data *dta = data;
1296 struct iv *iv;
1297 tree step, type, iv_type, iv_step, lbound, off;
1298 struct loop *loop = dta->ivopts_data->current_loop;
1300 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1301 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1302 return false;
1304 /* If base is a component ref, require that the offset of the reference
1305 be invariant. */
1306 if (TREE_CODE (base) == COMPONENT_REF)
1308 off = component_ref_field_offset (base);
1309 return expr_invariant_in_loop_p (loop, off);
1312 /* If base is array, first check whether we will be able to move the
1313 reference out of the loop (in order to take its address in strength
1314 reduction). In order for this to work we need both lower bound
1315 and step to be loop invariants. */
1316 if (TREE_CODE (base) == ARRAY_REF)
1318 step = array_ref_element_size (base);
1319 lbound = array_ref_low_bound (base);
1321 if (!expr_invariant_in_loop_p (loop, step)
1322 || !expr_invariant_in_loop_p (loop, lbound))
1323 return false;
1326 if (TREE_CODE (*idx) != SSA_NAME)
1327 return true;
1329 iv = get_iv (dta->ivopts_data, *idx);
1330 if (!iv)
1331 return false;
1333 *idx = iv->base;
1335 if (!iv->step)
1336 return true;
1338 iv_type = TREE_TYPE (iv->base);
1339 type = build_pointer_type (TREE_TYPE (base));
1340 if (TREE_CODE (base) == ARRAY_REF)
1342 step = array_ref_element_size (base);
1344 /* We only handle addresses whose step is an integer constant. */
1345 if (TREE_CODE (step) != INTEGER_CST)
1346 return false;
1348 else
1349 /* The step for pointer arithmetics already is 1 byte. */
1350 step = build_int_cst (type, 1);
1352 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1353 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1354 type, iv->base, iv->step, dta->stmt);
1355 else
1356 iv_step = fold_convert (iv_type, iv->step);
1358 if (!iv_step)
1360 /* The index might wrap. */
1361 return false;
1364 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1366 if (!*dta->step_p)
1367 *dta->step_p = step;
1368 else
1369 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1370 *dta->step_p, step);
1372 return true;
1375 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1376 object is passed to it in DATA. */
1378 static bool
1379 idx_record_use (tree base, tree *idx,
1380 void *data)
1382 find_interesting_uses_op (data, *idx);
1383 if (TREE_CODE (base) == ARRAY_REF)
1385 find_interesting_uses_op (data, array_ref_element_size (base));
1386 find_interesting_uses_op (data, array_ref_low_bound (base));
1388 return true;
1391 /* Finds addresses in *OP_P inside STMT. */
1393 static void
1394 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1396 tree base = unshare_expr (*op_p), step = NULL;
1397 struct iv *civ;
1398 struct ifs_ivopts_data ifs_ivopts_data;
1400 /* Ignore bitfields for now. Not really something terribly complicated
1401 to handle. TODO. */
1402 if (TREE_CODE (base) == COMPONENT_REF
1403 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1404 goto fail;
1406 ifs_ivopts_data.ivopts_data = data;
1407 ifs_ivopts_data.stmt = stmt;
1408 ifs_ivopts_data.step_p = &step;
1409 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1410 || zero_p (step))
1411 goto fail;
1413 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1414 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1416 if (TREE_CODE (base) == INDIRECT_REF)
1417 base = TREE_OPERAND (base, 0);
1418 else
1419 base = build_addr (base);
1421 civ = alloc_iv (base, step);
1422 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1423 return;
1425 fail:
1426 for_each_index (op_p, idx_record_use, data);
1429 /* Finds and records invariants used in STMT. */
1431 static void
1432 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1434 use_optype uses = NULL;
1435 unsigned i, n;
1436 tree op;
1438 if (TREE_CODE (stmt) == PHI_NODE)
1439 n = PHI_NUM_ARGS (stmt);
1440 else
1442 get_stmt_operands (stmt);
1443 uses = STMT_USE_OPS (stmt);
1444 n = NUM_USES (uses);
1447 for (i = 0; i < n; i++)
1449 if (TREE_CODE (stmt) == PHI_NODE)
1450 op = PHI_ARG_DEF (stmt, i);
1451 else
1452 op = USE_OP (uses, i);
1454 record_invariant (data, op, false);
1458 /* Finds interesting uses of induction variables in the statement STMT. */
1460 static void
1461 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1463 struct iv *iv;
1464 tree op, lhs, rhs;
1465 use_optype uses = NULL;
1466 unsigned i, n;
1468 find_invariants_stmt (data, stmt);
1470 if (TREE_CODE (stmt) == COND_EXPR)
1472 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1473 return;
1476 if (TREE_CODE (stmt) == MODIFY_EXPR)
1478 lhs = TREE_OPERAND (stmt, 0);
1479 rhs = TREE_OPERAND (stmt, 1);
1481 if (TREE_CODE (lhs) == SSA_NAME)
1483 /* If the statement defines an induction variable, the uses are not
1484 interesting by themselves. */
1486 iv = get_iv (data, lhs);
1488 if (iv && !zero_p (iv->step))
1489 return;
1492 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1494 case tcc_comparison:
1495 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1496 return;
1498 case tcc_reference:
1499 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1500 if (REFERENCE_CLASS_P (lhs))
1501 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1502 return;
1504 default: ;
1507 if (REFERENCE_CLASS_P (lhs)
1508 && is_gimple_val (rhs))
1510 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1511 find_interesting_uses_op (data, rhs);
1512 return;
1515 /* TODO -- we should also handle address uses of type
1517 memory = call (whatever);
1521 call (memory). */
1524 if (TREE_CODE (stmt) == PHI_NODE
1525 && bb_for_stmt (stmt) == data->current_loop->header)
1527 lhs = PHI_RESULT (stmt);
1528 iv = get_iv (data, lhs);
1530 if (iv && !zero_p (iv->step))
1531 return;
1534 if (TREE_CODE (stmt) == PHI_NODE)
1535 n = PHI_NUM_ARGS (stmt);
1536 else
1538 uses = STMT_USE_OPS (stmt);
1539 n = NUM_USES (uses);
1542 for (i = 0; i < n; i++)
1544 if (TREE_CODE (stmt) == PHI_NODE)
1545 op = PHI_ARG_DEF (stmt, i);
1546 else
1547 op = USE_OP (uses, i);
1549 if (TREE_CODE (op) != SSA_NAME)
1550 continue;
1552 iv = get_iv (data, op);
1553 if (!iv)
1554 continue;
1556 find_interesting_uses_op (data, op);
1560 /* Finds interesting uses of induction variables outside of loops
1561 on loop exit edge EXIT. */
1563 static void
1564 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1566 tree phi, def;
1568 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1570 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1571 find_interesting_uses_outer (data, def);
1575 /* Finds uses of the induction variables that are interesting. */
1577 static void
1578 find_interesting_uses (struct ivopts_data *data)
1580 basic_block bb;
1581 block_stmt_iterator bsi;
1582 tree phi;
1583 basic_block *body = get_loop_body (data->current_loop);
1584 unsigned i;
1585 struct version_info *info;
1586 edge e;
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1589 fprintf (dump_file, "Uses:\n\n");
1591 for (i = 0; i < data->current_loop->num_nodes; i++)
1593 edge_iterator ei;
1594 bb = body[i];
1596 FOR_EACH_EDGE (e, ei, bb->succs)
1597 if (e->dest != EXIT_BLOCK_PTR
1598 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1599 find_interesting_uses_outside (data, e);
1601 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1602 find_interesting_uses_stmt (data, phi);
1603 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1604 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1609 bitmap_iterator bi;
1611 fprintf (dump_file, "\n");
1613 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1615 info = ver_info (data, i);
1616 if (info->inv_id)
1618 fprintf (dump_file, " ");
1619 print_generic_expr (dump_file, info->name, TDF_SLIM);
1620 fprintf (dump_file, " is invariant (%d)%s\n",
1621 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1625 fprintf (dump_file, "\n");
1628 free (body);
1631 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1632 position to POS. If USE is not NULL, the candidate is set as related to
1633 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1634 replacement of the final value of the iv by a direct computation. */
1636 static struct iv_cand *
1637 add_candidate_1 (struct ivopts_data *data,
1638 tree base, tree step, bool important, enum iv_position pos,
1639 struct iv_use *use, tree incremented_at)
1641 unsigned i;
1642 struct iv_cand *cand = NULL;
1643 tree type;
1645 if (base)
1647 type = TREE_TYPE (base);
1648 if (!TYPE_UNSIGNED (type))
1650 type = unsigned_type_for (type);
1651 base = fold_convert (type, base);
1652 if (step)
1653 step = fold_convert (type, step);
1657 for (i = 0; i < n_iv_cands (data); i++)
1659 cand = iv_cand (data, i);
1661 if (cand->pos != pos)
1662 continue;
1664 if (cand->incremented_at != incremented_at)
1665 continue;
1667 if (!cand->iv)
1669 if (!base && !step)
1670 break;
1672 continue;
1675 if (!base && !step)
1676 continue;
1678 if (!operand_equal_p (base, cand->iv->base, 0))
1679 continue;
1681 if (zero_p (cand->iv->step))
1683 if (zero_p (step))
1684 break;
1686 else
1688 if (step && operand_equal_p (step, cand->iv->step, 0))
1689 break;
1693 if (i == n_iv_cands (data))
1695 cand = xcalloc (1, sizeof (struct iv_cand));
1696 cand->id = i;
1698 if (!base && !step)
1699 cand->iv = NULL;
1700 else
1701 cand->iv = alloc_iv (base, step);
1703 cand->pos = pos;
1704 if (pos != IP_ORIGINAL && cand->iv)
1706 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1707 cand->var_after = cand->var_before;
1709 cand->important = important;
1710 cand->incremented_at = incremented_at;
1711 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1714 dump_cand (dump_file, cand);
1717 if (important && !cand->important)
1719 cand->important = true;
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1721 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1724 if (use)
1726 bitmap_set_bit (use->related_cands, i);
1727 if (dump_file && (dump_flags & TDF_DETAILS))
1728 fprintf (dump_file, "Candidate %d is related to use %d\n",
1729 cand->id, use->id);
1732 return cand;
1735 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1736 position to POS. If USE is not NULL, the candidate is set as related to
1737 it. The candidate computation is scheduled on all available positions. */
1739 static void
1740 add_candidate (struct ivopts_data *data,
1741 tree base, tree step, bool important, struct iv_use *use)
1743 if (ip_normal_pos (data->current_loop))
1744 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1745 if (ip_end_pos (data->current_loop))
1746 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1749 /* Adds standard iv candidates. */
1751 static void
1752 add_standard_iv_candidates (struct ivopts_data *data)
1754 /* Add 0 + 1 * iteration candidate. */
1755 add_candidate (data,
1756 build_int_cst (unsigned_intSI_type_node, 0),
1757 build_int_cst (unsigned_intSI_type_node, 1),
1758 true, NULL);
1760 /* The same for a long type if it is still fast enough. */
1761 if (BITS_PER_WORD > 32)
1762 add_candidate (data,
1763 build_int_cst (unsigned_intDI_type_node, 0),
1764 build_int_cst (unsigned_intDI_type_node, 1),
1765 true, NULL);
1769 /* Adds candidates bases on the old induction variable IV. */
1771 static void
1772 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1774 tree phi, def;
1775 struct iv_cand *cand;
1777 add_candidate (data, iv->base, iv->step, true, NULL);
1779 /* The same, but with initial value zero. */
1780 add_candidate (data,
1781 build_int_cst (TREE_TYPE (iv->base), 0),
1782 iv->step, true, NULL);
1784 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1785 if (TREE_CODE (phi) == PHI_NODE)
1787 /* Additionally record the possibility of leaving the original iv
1788 untouched. */
1789 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1790 cand = add_candidate_1 (data,
1791 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1792 SSA_NAME_DEF_STMT (def));
1793 cand->var_before = iv->ssa_name;
1794 cand->var_after = def;
1798 /* Adds candidates based on the old induction variables. */
1800 static void
1801 add_old_ivs_candidates (struct ivopts_data *data)
1803 unsigned i;
1804 struct iv *iv;
1805 bitmap_iterator bi;
1807 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1809 iv = ver_info (data, i)->iv;
1810 if (iv && iv->biv_p && !zero_p (iv->step))
1811 add_old_iv_candidates (data, iv);
1815 /* Adds candidates based on the value of the induction variable IV and USE. */
1817 static void
1818 add_iv_value_candidates (struct ivopts_data *data,
1819 struct iv *iv, struct iv_use *use)
1821 add_candidate (data, iv->base, iv->step, false, use);
1823 /* The same, but with initial value zero. */
1824 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1825 iv->step, false, use);
1828 /* Adds candidates based on the address IV and USE. */
1830 static void
1831 add_address_candidates (struct ivopts_data *data,
1832 struct iv *iv, struct iv_use *use)
1834 tree base, abase, tmp, *act;
1836 /* First, the trivial choices. */
1837 add_iv_value_candidates (data, iv, use);
1839 /* Second, try removing the COMPONENT_REFs. */
1840 if (TREE_CODE (iv->base) == ADDR_EXPR)
1842 base = TREE_OPERAND (iv->base, 0);
1843 while (TREE_CODE (base) == COMPONENT_REF
1844 || (TREE_CODE (base) == ARRAY_REF
1845 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1846 base = TREE_OPERAND (base, 0);
1848 if (base != TREE_OPERAND (iv->base, 0))
1850 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1851 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1853 if (TREE_CODE (base) == INDIRECT_REF)
1854 base = TREE_OPERAND (base, 0);
1855 else
1856 base = build_addr (base);
1857 add_candidate (data, base, iv->step, false, use);
1861 /* Third, try removing the constant offset. */
1862 abase = iv->base;
1863 while (TREE_CODE (abase) == PLUS_EXPR
1864 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1865 abase = TREE_OPERAND (abase, 0);
1866 /* We found the offset, so make the copy of the non-shared part and
1867 remove it. */
1868 if (TREE_CODE (abase) == PLUS_EXPR)
1870 tmp = iv->base;
1871 act = &base;
1873 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1875 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1876 NULL_TREE, TREE_OPERAND (tmp, 1));
1877 act = &TREE_OPERAND (*act, 0);
1879 *act = TREE_OPERAND (tmp, 0);
1881 add_candidate (data, base, iv->step, false, use);
1885 /* Possibly adds pseudocandidate for replacing the final value of USE by
1886 a direct computation. */
1888 static void
1889 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1891 struct tree_niter_desc *niter;
1892 struct loop *loop = data->current_loop;
1894 /* We must know where we exit the loop and how many times does it roll. */
1895 if (!single_dom_exit (loop))
1896 return;
1898 niter = &loop_data (loop)->niter;
1899 if (!niter->niter
1900 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1901 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1902 return;
1904 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1907 /* Adds candidates based on the uses. */
1909 static void
1910 add_derived_ivs_candidates (struct ivopts_data *data)
1912 unsigned i;
1914 for (i = 0; i < n_iv_uses (data); i++)
1916 struct iv_use *use = iv_use (data, i);
1918 if (!use)
1919 continue;
1921 switch (use->type)
1923 case USE_NONLINEAR_EXPR:
1924 case USE_COMPARE:
1925 /* Just add the ivs based on the value of the iv used here. */
1926 add_iv_value_candidates (data, use->iv, use);
1927 break;
1929 case USE_OUTER:
1930 add_iv_value_candidates (data, use->iv, use);
1932 /* Additionally, add the pseudocandidate for the possibility to
1933 replace the final value by a direct computation. */
1934 add_iv_outer_candidates (data, use);
1935 break;
1937 case USE_ADDRESS:
1938 add_address_candidates (data, use->iv, use);
1939 break;
1941 default:
1942 gcc_unreachable ();
1947 /* Record important candidates and add them to related_cands bitmaps
1948 if needed. */
1950 static void
1951 record_important_candidates (struct ivopts_data *data)
1953 unsigned i;
1954 struct iv_use *use;
1956 for (i = 0; i < n_iv_cands (data); i++)
1958 struct iv_cand *cand = iv_cand (data, i);
1960 if (cand->important)
1961 bitmap_set_bit (data->important_candidates, i);
1964 data->consider_all_candidates = (n_iv_cands (data)
1965 <= CONSIDER_ALL_CANDIDATES_BOUND);
1967 if (data->consider_all_candidates)
1969 /* We will not need "related_cands" bitmaps in this case,
1970 so release them to decrease peak memory consumption. */
1971 for (i = 0; i < n_iv_uses (data); i++)
1973 use = iv_use (data, i);
1974 BITMAP_XFREE (use->related_cands);
1977 else
1979 /* Add important candidates to the related_cands bitmaps. */
1980 for (i = 0; i < n_iv_uses (data); i++)
1981 bitmap_ior_into (iv_use (data, i)->related_cands,
1982 data->important_candidates);
1986 /* Finds the candidates for the induction variables. */
1988 static void
1989 find_iv_candidates (struct ivopts_data *data)
1991 /* Add commonly used ivs. */
1992 add_standard_iv_candidates (data);
1994 /* Add old induction variables. */
1995 add_old_ivs_candidates (data);
1997 /* Add induction variables derived from uses. */
1998 add_derived_ivs_candidates (data);
2000 /* Record the important candidates. */
2001 record_important_candidates (data);
2004 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2005 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2006 we allocate a simple list to every use. */
2008 static void
2009 alloc_use_cost_map (struct ivopts_data *data)
2011 unsigned i, size, s, j;
2013 for (i = 0; i < n_iv_uses (data); i++)
2015 struct iv_use *use = iv_use (data, i);
2016 bitmap_iterator bi;
2018 if (data->consider_all_candidates)
2019 size = n_iv_cands (data);
2020 else
2022 s = 0;
2023 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2025 s++;
2028 /* Round up to the power of two, so that moduling by it is fast. */
2029 for (size = 1; size < s; size <<= 1)
2030 continue;
2033 use->n_map_members = size;
2034 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2038 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2039 on invariants DEPENDS_ON. */
2041 static void
2042 set_use_iv_cost (struct ivopts_data *data,
2043 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2044 bitmap depends_on)
2046 unsigned i, s;
2048 if (cost == INFTY)
2050 BITMAP_XFREE (depends_on);
2051 return;
2054 if (data->consider_all_candidates)
2056 use->cost_map[cand->id].cand = cand;
2057 use->cost_map[cand->id].cost = cost;
2058 use->cost_map[cand->id].depends_on = depends_on;
2059 return;
2062 /* n_map_members is a power of two, so this computes modulo. */
2063 s = cand->id & (use->n_map_members - 1);
2064 for (i = s; i < use->n_map_members; i++)
2065 if (!use->cost_map[i].cand)
2066 goto found;
2067 for (i = 0; i < s; i++)
2068 if (!use->cost_map[i].cand)
2069 goto found;
2071 gcc_unreachable ();
2073 found:
2074 use->cost_map[i].cand = cand;
2075 use->cost_map[i].cost = cost;
2076 use->cost_map[i].depends_on = depends_on;
2079 /* Gets cost of (USE, CANDIDATE) pair. */
2081 static struct cost_pair *
2082 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2083 struct iv_cand *cand)
2085 unsigned i, s;
2086 struct cost_pair *ret;
2088 if (!cand)
2089 return NULL;
2091 if (data->consider_all_candidates)
2093 ret = use->cost_map + cand->id;
2094 if (!ret->cand)
2095 return NULL;
2097 return ret;
2100 /* n_map_members is a power of two, so this computes modulo. */
2101 s = cand->id & (use->n_map_members - 1);
2102 for (i = s; i < use->n_map_members; i++)
2103 if (use->cost_map[i].cand == cand)
2104 return use->cost_map + i;
2106 for (i = 0; i < s; i++)
2107 if (use->cost_map[i].cand == cand)
2108 return use->cost_map + i;
2110 return NULL;
2113 /* Returns estimate on cost of computing SEQ. */
2115 static unsigned
2116 seq_cost (rtx seq)
2118 unsigned cost = 0;
2119 rtx set;
2121 for (; seq; seq = NEXT_INSN (seq))
2123 set = single_set (seq);
2124 if (set)
2125 cost += rtx_cost (set, SET);
2126 else
2127 cost++;
2130 return cost;
2133 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2134 static rtx
2135 produce_memory_decl_rtl (tree obj, int *regno)
2137 rtx x;
2138 if (!obj)
2139 abort ();
2140 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2142 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2143 x = gen_rtx_SYMBOL_REF (Pmode, name);
2145 else
2146 x = gen_raw_REG (Pmode, (*regno)++);
2148 return gen_rtx_MEM (DECL_MODE (obj), x);
2151 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2152 walk_tree. DATA contains the actual fake register number. */
2154 static tree
2155 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2157 tree obj = NULL_TREE;
2158 rtx x = NULL_RTX;
2159 int *regno = data;
2161 switch (TREE_CODE (*expr_p))
2163 case ADDR_EXPR:
2164 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2165 handled_component_p (*expr_p);
2166 expr_p = &TREE_OPERAND (*expr_p, 0))
2167 continue;
2168 obj = *expr_p;
2169 if (DECL_P (obj))
2170 x = produce_memory_decl_rtl (obj, regno);
2171 break;
2173 case SSA_NAME:
2174 *ws = 0;
2175 obj = SSA_NAME_VAR (*expr_p);
2176 if (!DECL_RTL_SET_P (obj))
2177 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2178 break;
2180 case VAR_DECL:
2181 case PARM_DECL:
2182 case RESULT_DECL:
2183 *ws = 0;
2184 obj = *expr_p;
2186 if (DECL_RTL_SET_P (obj))
2187 break;
2189 if (DECL_MODE (obj) == BLKmode)
2190 x = produce_memory_decl_rtl (obj, regno);
2191 else
2192 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2194 break;
2196 default:
2197 break;
2200 if (x)
2202 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2203 SET_DECL_RTL (obj, x);
2206 return NULL_TREE;
2209 /* Determines cost of the computation of EXPR. */
2211 static unsigned
2212 computation_cost (tree expr)
2214 rtx seq, rslt;
2215 tree type = TREE_TYPE (expr);
2216 unsigned cost;
2217 int regno = 0;
2219 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2220 start_sequence ();
2221 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2222 seq = get_insns ();
2223 end_sequence ();
2225 cost = seq_cost (seq);
2226 if (GET_CODE (rslt) == MEM)
2227 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2229 return cost;
2232 /* Returns variable containing the value of candidate CAND at statement AT. */
2234 static tree
2235 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2237 if (stmt_after_increment (loop, cand, stmt))
2238 return cand->var_after;
2239 else
2240 return cand->var_before;
2243 /* Determines the expression by that USE is expressed from induction variable
2244 CAND at statement AT in LOOP. */
2246 static tree
2247 get_computation_at (struct loop *loop,
2248 struct iv_use *use, struct iv_cand *cand, tree at)
2250 tree ubase = use->iv->base;
2251 tree ustep = use->iv->step;
2252 tree cbase = cand->iv->base;
2253 tree cstep = cand->iv->step;
2254 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2255 tree uutype;
2256 tree expr, delta;
2257 tree ratio;
2258 unsigned HOST_WIDE_INT ustepi, cstepi;
2259 HOST_WIDE_INT ratioi;
2261 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2263 /* We do not have a precision to express the values of use. */
2264 return NULL_TREE;
2267 expr = var_at_stmt (loop, cand, at);
2269 if (TREE_TYPE (expr) != ctype)
2271 /* This may happen with the original ivs. */
2272 expr = fold_convert (ctype, expr);
2275 if (TYPE_UNSIGNED (utype))
2276 uutype = utype;
2277 else
2279 uutype = unsigned_type_for (utype);
2280 ubase = fold_convert (uutype, ubase);
2281 ustep = fold_convert (uutype, ustep);
2284 if (uutype != ctype)
2286 expr = fold_convert (uutype, expr);
2287 cbase = fold_convert (uutype, cbase);
2288 cstep = fold_convert (uutype, cstep);
2291 if (!cst_and_fits_in_hwi (cstep)
2292 || !cst_and_fits_in_hwi (ustep))
2293 return NULL_TREE;
2295 ustepi = int_cst_value (ustep);
2296 cstepi = int_cst_value (cstep);
2298 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2300 /* TODO maybe consider case when ustep divides cstep and the ratio is
2301 a power of 2 (so that the division is fast to execute)? We would
2302 need to be much more careful with overflows etc. then. */
2303 return NULL_TREE;
2306 /* We may need to shift the value if we are after the increment. */
2307 if (stmt_after_increment (loop, cand, at))
2308 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2310 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2311 or |ratio| == 1, it is better to handle this like
2313 ubase - ratio * cbase + ratio * var. */
2315 if (ratioi == 1)
2317 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2318 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2320 else if (ratioi == -1)
2322 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2323 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2325 else if (TREE_CODE (cbase) == INTEGER_CST)
2327 ratio = build_int_cst_type (uutype, ratioi);
2328 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2329 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2330 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2331 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2333 else
2335 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2336 ratio = build_int_cst_type (uutype, ratioi);
2337 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2338 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2341 return fold_convert (utype, expr);
2344 /* Determines the expression by that USE is expressed from induction variable
2345 CAND in LOOP. */
2347 static tree
2348 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2350 return get_computation_at (loop, use, cand, use->stmt);
2353 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2355 static void
2356 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2358 tree op0, op1;
2359 enum tree_code code;
2361 while (1)
2363 if (cst_and_fits_in_hwi (*expr))
2365 *offset += int_cst_value (*expr);
2366 *expr = integer_zero_node;
2367 return;
2370 code = TREE_CODE (*expr);
2372 if (code != PLUS_EXPR && code != MINUS_EXPR)
2373 return;
2375 op0 = TREE_OPERAND (*expr, 0);
2376 op1 = TREE_OPERAND (*expr, 1);
2378 if (cst_and_fits_in_hwi (op1))
2380 if (code == PLUS_EXPR)
2381 *offset += int_cst_value (op1);
2382 else
2383 *offset -= int_cst_value (op1);
2385 *expr = op0;
2386 continue;
2389 if (code != PLUS_EXPR)
2390 return;
2392 if (!cst_and_fits_in_hwi (op0))
2393 return;
2395 *offset += int_cst_value (op0);
2396 *expr = op1;
2400 /* Returns cost of addition in MODE. */
2402 static unsigned
2403 add_cost (enum machine_mode mode)
2405 static unsigned costs[NUM_MACHINE_MODES];
2406 rtx seq;
2407 unsigned cost;
2409 if (costs[mode])
2410 return costs[mode];
2412 start_sequence ();
2413 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2414 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2415 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2416 NULL_RTX);
2417 seq = get_insns ();
2418 end_sequence ();
2420 cost = seq_cost (seq);
2421 if (!cost)
2422 cost = 1;
2424 costs[mode] = cost;
2426 if (dump_file && (dump_flags & TDF_DETAILS))
2427 fprintf (dump_file, "Addition in %s costs %d\n",
2428 GET_MODE_NAME (mode), cost);
2429 return cost;
2432 /* Entry in a hashtable of already known costs for multiplication. */
2433 struct mbc_entry
2435 HOST_WIDE_INT cst; /* The constant to multiply by. */
2436 enum machine_mode mode; /* In mode. */
2437 unsigned cost; /* The cost. */
2440 /* Counts hash value for the ENTRY. */
2442 static hashval_t
2443 mbc_entry_hash (const void *entry)
2445 const struct mbc_entry *e = entry;
2447 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2450 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2452 static int
2453 mbc_entry_eq (const void *entry1, const void *entry2)
2455 const struct mbc_entry *e1 = entry1;
2456 const struct mbc_entry *e2 = entry2;
2458 return (e1->mode == e2->mode
2459 && e1->cst == e2->cst);
2462 /* Returns cost of multiplication by constant CST in MODE. */
2464 static unsigned
2465 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2467 static htab_t costs;
2468 struct mbc_entry **cached, act;
2469 rtx seq;
2470 unsigned cost;
2472 if (!costs)
2473 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2475 act.mode = mode;
2476 act.cst = cst;
2477 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2478 if (*cached)
2479 return (*cached)->cost;
2481 *cached = xmalloc (sizeof (struct mbc_entry));
2482 (*cached)->mode = mode;
2483 (*cached)->cst = cst;
2485 start_sequence ();
2486 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2487 NULL_RTX, 0);
2488 seq = get_insns ();
2489 end_sequence ();
2491 cost = seq_cost (seq);
2493 if (dump_file && (dump_flags & TDF_DETAILS))
2494 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2495 (int) cst, GET_MODE_NAME (mode), cost);
2497 (*cached)->cost = cost;
2499 return cost;
2502 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2503 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2504 variable is omitted. The created memory accesses MODE.
2506 TODO -- there must be some better way. This all is quite crude. */
2508 static unsigned
2509 get_address_cost (bool symbol_present, bool var_present,
2510 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2512 #define MAX_RATIO 128
2513 static sbitmap valid_mult;
2514 static HOST_WIDE_INT rat, off;
2515 static HOST_WIDE_INT min_offset, max_offset;
2516 static unsigned costs[2][2][2][2];
2517 unsigned cost, acost;
2518 rtx seq, addr, base;
2519 bool offset_p, ratio_p;
2520 rtx reg1;
2521 HOST_WIDE_INT s_offset;
2522 unsigned HOST_WIDE_INT mask;
2523 unsigned bits;
2525 if (!valid_mult)
2527 HOST_WIDE_INT i;
2529 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2531 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2532 for (i = 1; i <= 1 << 20; i <<= 1)
2534 XEXP (addr, 1) = GEN_INT (i);
2535 if (!memory_address_p (Pmode, addr))
2536 break;
2538 max_offset = i >> 1;
2539 off = max_offset;
2541 for (i = 1; i <= 1 << 20; i <<= 1)
2543 XEXP (addr, 1) = GEN_INT (-i);
2544 if (!memory_address_p (Pmode, addr))
2545 break;
2547 min_offset = -(i >> 1);
2549 if (dump_file && (dump_flags & TDF_DETAILS))
2551 fprintf (dump_file, "get_address_cost:\n");
2552 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2553 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2556 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2557 sbitmap_zero (valid_mult);
2558 rat = 1;
2559 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2560 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2562 XEXP (addr, 1) = GEN_INT (i);
2563 if (memory_address_p (Pmode, addr))
2565 SET_BIT (valid_mult, i + MAX_RATIO);
2566 rat = i;
2570 if (dump_file && (dump_flags & TDF_DETAILS))
2572 fprintf (dump_file, " allowed multipliers:");
2573 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2574 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2575 fprintf (dump_file, " %d", (int) i);
2576 fprintf (dump_file, "\n");
2577 fprintf (dump_file, "\n");
2581 bits = GET_MODE_BITSIZE (Pmode);
2582 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2583 offset &= mask;
2584 if ((offset >> (bits - 1) & 1))
2585 offset |= ~mask;
2586 s_offset = offset;
2588 cost = 0;
2589 offset_p = (s_offset != 0
2590 && min_offset <= s_offset && s_offset <= max_offset);
2591 ratio_p = (ratio != 1
2592 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2593 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2595 if (ratio != 1 && !ratio_p)
2596 cost += multiply_by_cost (ratio, Pmode);
2598 if (s_offset && !offset_p && !symbol_present)
2600 cost += add_cost (Pmode);
2601 var_present = true;
2604 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2605 if (!acost)
2607 acost = 0;
2609 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2610 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2611 if (ratio_p)
2612 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2614 if (var_present)
2615 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2617 if (symbol_present)
2619 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2620 if (offset_p)
2621 base = gen_rtx_fmt_e (CONST, Pmode,
2622 gen_rtx_fmt_ee (PLUS, Pmode,
2623 base,
2624 GEN_INT (off)));
2626 else if (offset_p)
2627 base = GEN_INT (off);
2628 else
2629 base = NULL_RTX;
2631 if (base)
2632 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2634 start_sequence ();
2635 addr = memory_address (Pmode, addr);
2636 seq = get_insns ();
2637 end_sequence ();
2639 acost = seq_cost (seq);
2640 acost += address_cost (addr, Pmode);
2642 if (!acost)
2643 acost = 1;
2644 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2647 return cost + acost;
2650 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2651 the bitmap to that we should store it. */
2653 static struct ivopts_data *fd_ivopts_data;
2654 static tree
2655 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2657 bitmap *depends_on = data;
2658 struct version_info *info;
2660 if (TREE_CODE (*expr_p) != SSA_NAME)
2661 return NULL_TREE;
2662 info = name_info (fd_ivopts_data, *expr_p);
2664 if (!info->inv_id || info->has_nonlin_use)
2665 return NULL_TREE;
2667 if (!*depends_on)
2668 *depends_on = BITMAP_XMALLOC ();
2669 bitmap_set_bit (*depends_on, info->inv_id);
2671 return NULL_TREE;
2674 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2675 invariants the computation depends on. */
2677 static unsigned
2678 force_var_cost (struct ivopts_data *data,
2679 tree expr, bitmap *depends_on)
2681 static bool costs_initialized = false;
2682 static unsigned integer_cost;
2683 static unsigned symbol_cost;
2684 static unsigned address_cost;
2685 tree op0, op1;
2686 unsigned cost0, cost1, cost;
2687 enum machine_mode mode;
2689 if (!costs_initialized)
2691 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2692 rtx x = gen_rtx_MEM (DECL_MODE (var),
2693 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2694 tree addr;
2695 tree type = build_pointer_type (integer_type_node);
2697 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2698 2000));
2700 SET_DECL_RTL (var, x);
2701 TREE_STATIC (var) = 1;
2702 addr = build1 (ADDR_EXPR, type, var);
2703 symbol_cost = computation_cost (addr) + 1;
2705 address_cost
2706 = computation_cost (build2 (PLUS_EXPR, type,
2707 addr,
2708 build_int_cst_type (type, 2000))) + 1;
2709 if (dump_file && (dump_flags & TDF_DETAILS))
2711 fprintf (dump_file, "force_var_cost:\n");
2712 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2713 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2714 fprintf (dump_file, " address %d\n", (int) address_cost);
2715 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2716 fprintf (dump_file, "\n");
2719 costs_initialized = true;
2722 if (depends_on)
2724 fd_ivopts_data = data;
2725 walk_tree (&expr, find_depends, depends_on, NULL);
2728 if (SSA_VAR_P (expr))
2729 return 0;
2731 if (TREE_INVARIANT (expr))
2733 if (TREE_CODE (expr) == INTEGER_CST)
2734 return integer_cost;
2736 if (TREE_CODE (expr) == ADDR_EXPR)
2738 tree obj = TREE_OPERAND (expr, 0);
2740 if (TREE_CODE (obj) == VAR_DECL
2741 || TREE_CODE (obj) == PARM_DECL
2742 || TREE_CODE (obj) == RESULT_DECL)
2743 return symbol_cost;
2746 return address_cost;
2749 switch (TREE_CODE (expr))
2751 case PLUS_EXPR:
2752 case MINUS_EXPR:
2753 case MULT_EXPR:
2754 op0 = TREE_OPERAND (expr, 0);
2755 op1 = TREE_OPERAND (expr, 1);
2757 if (is_gimple_val (op0))
2758 cost0 = 0;
2759 else
2760 cost0 = force_var_cost (data, op0, NULL);
2762 if (is_gimple_val (op1))
2763 cost1 = 0;
2764 else
2765 cost1 = force_var_cost (data, op1, NULL);
2767 break;
2769 default:
2770 /* Just an arbitrary value, FIXME. */
2771 return target_spill_cost;
2774 mode = TYPE_MODE (TREE_TYPE (expr));
2775 switch (TREE_CODE (expr))
2777 case PLUS_EXPR:
2778 case MINUS_EXPR:
2779 cost = add_cost (mode);
2780 break;
2782 case MULT_EXPR:
2783 if (cst_and_fits_in_hwi (op0))
2784 cost = multiply_by_cost (int_cst_value (op0), mode);
2785 else if (cst_and_fits_in_hwi (op1))
2786 cost = multiply_by_cost (int_cst_value (op1), mode);
2787 else
2788 return target_spill_cost;
2789 break;
2791 default:
2792 gcc_unreachable ();
2795 cost += cost0;
2796 cost += cost1;
2798 /* Bound the cost by target_spill_cost. The parts of complicated
2799 computations often are either loop invariant or at least can
2800 be shared between several iv uses, so letting this grow without
2801 limits would not give reasonable results. */
2802 return cost < target_spill_cost ? cost : target_spill_cost;
2805 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2806 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2807 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2808 invariants the computation depends on. */
2810 static unsigned
2811 split_address_cost (struct ivopts_data *data,
2812 tree addr, bool *symbol_present, bool *var_present,
2813 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2815 tree core;
2816 HOST_WIDE_INT bitsize;
2817 HOST_WIDE_INT bitpos;
2818 tree toffset;
2819 enum machine_mode mode;
2820 int unsignedp, volatilep;
2822 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2823 &unsignedp, &volatilep);
2825 if (toffset != 0
2826 || bitpos % BITS_PER_UNIT != 0
2827 || TREE_CODE (core) != VAR_DECL)
2829 *symbol_present = false;
2830 *var_present = true;
2831 fd_ivopts_data = data;
2832 walk_tree (&addr, find_depends, depends_on, NULL);
2833 return target_spill_cost;
2836 *offset += bitpos / BITS_PER_UNIT;
2837 if (TREE_STATIC (core)
2838 || DECL_EXTERNAL (core))
2840 *symbol_present = true;
2841 *var_present = false;
2842 return 0;
2845 *symbol_present = false;
2846 *var_present = true;
2847 return 0;
2850 /* Estimates cost of expressing difference of addresses E1 - E2 as
2851 var + symbol + offset. The value of offset is added to OFFSET,
2852 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2853 part is missing. DEPENDS_ON is a set of the invariants the computation
2854 depends on. */
2856 static unsigned
2857 ptr_difference_cost (struct ivopts_data *data,
2858 tree e1, tree e2, bool *symbol_present, bool *var_present,
2859 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2861 HOST_WIDE_INT diff = 0;
2862 unsigned cost;
2864 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2866 if (ptr_difference_const (e1, e2, &diff))
2868 *offset += diff;
2869 *symbol_present = false;
2870 *var_present = false;
2871 return 0;
2874 if (e2 == integer_zero_node)
2875 return split_address_cost (data, TREE_OPERAND (e1, 0),
2876 symbol_present, var_present, offset, depends_on);
2878 *symbol_present = false;
2879 *var_present = true;
2881 cost = force_var_cost (data, e1, depends_on);
2882 cost += force_var_cost (data, e2, depends_on);
2883 cost += add_cost (Pmode);
2885 return cost;
2888 /* Estimates cost of expressing difference E1 - E2 as
2889 var + symbol + offset. The value of offset is added to OFFSET,
2890 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2891 part is missing. DEPENDS_ON is a set of the invariants the computation
2892 depends on. */
2894 static unsigned
2895 difference_cost (struct ivopts_data *data,
2896 tree e1, tree e2, bool *symbol_present, bool *var_present,
2897 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2899 unsigned cost;
2900 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2902 strip_offset (&e1, offset);
2903 *offset = -*offset;
2904 strip_offset (&e2, offset);
2905 *offset = -*offset;
2907 if (TREE_CODE (e1) == ADDR_EXPR)
2908 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2909 depends_on);
2910 *symbol_present = false;
2912 if (operand_equal_p (e1, e2, 0))
2914 *var_present = false;
2915 return 0;
2917 *var_present = true;
2918 if (zero_p (e2))
2919 return force_var_cost (data, e1, depends_on);
2921 if (zero_p (e1))
2923 cost = force_var_cost (data, e2, depends_on);
2924 cost += multiply_by_cost (-1, mode);
2926 return cost;
2929 cost = force_var_cost (data, e1, depends_on);
2930 cost += force_var_cost (data, e2, depends_on);
2931 cost += add_cost (mode);
2933 return cost;
2936 /* Determines the cost of the computation by that USE is expressed
2937 from induction variable CAND. If ADDRESS_P is true, we just need
2938 to create an address from it, otherwise we want to get it into
2939 register. A set of invariants we depend on is stored in
2940 DEPENDS_ON. AT is the statement at that the value is computed. */
2942 static unsigned
2943 get_computation_cost_at (struct ivopts_data *data,
2944 struct iv_use *use, struct iv_cand *cand,
2945 bool address_p, bitmap *depends_on, tree at)
2947 tree ubase = use->iv->base, ustep = use->iv->step;
2948 tree cbase, cstep;
2949 tree utype = TREE_TYPE (ubase), ctype;
2950 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2951 HOST_WIDE_INT ratio, aratio;
2952 bool var_present, symbol_present;
2953 unsigned cost = 0, n_sums;
2955 *depends_on = NULL;
2957 /* Only consider real candidates. */
2958 if (!cand->iv)
2959 return INFTY;
2961 cbase = cand->iv->base;
2962 cstep = cand->iv->step;
2963 ctype = TREE_TYPE (cbase);
2965 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2967 /* We do not have a precision to express the values of use. */
2968 return INFTY;
2971 if (address_p)
2973 /* Do not try to express address of an object with computation based
2974 on address of a different object. This may cause problems in rtl
2975 level alias analysis (that does not expect this to be happening,
2976 as this is illegal in C), and would be unlikely to be useful
2977 anyway. */
2978 if (use->iv->base_object
2979 && cand->iv->base_object
2980 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2981 return INFTY;
2984 if (!cst_and_fits_in_hwi (ustep)
2985 || !cst_and_fits_in_hwi (cstep))
2986 return INFTY;
2988 if (TREE_CODE (ubase) == INTEGER_CST
2989 && !cst_and_fits_in_hwi (ubase))
2990 goto fallback;
2992 if (TREE_CODE (cbase) == INTEGER_CST
2993 && !cst_and_fits_in_hwi (cbase))
2994 goto fallback;
2996 ustepi = int_cst_value (ustep);
2997 cstepi = int_cst_value (cstep);
2999 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3001 /* TODO -- add direct handling of this case. */
3002 goto fallback;
3005 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3006 return INFTY;
3008 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3009 or ratio == 1, it is better to handle this like
3011 ubase - ratio * cbase + ratio * var
3013 (also holds in the case ratio == -1, TODO. */
3015 if (TREE_CODE (cbase) == INTEGER_CST)
3017 offset = - ratio * int_cst_value (cbase);
3018 cost += difference_cost (data,
3019 ubase, integer_zero_node,
3020 &symbol_present, &var_present, &offset,
3021 depends_on);
3023 else if (ratio == 1)
3025 cost += difference_cost (data,
3026 ubase, cbase,
3027 &symbol_present, &var_present, &offset,
3028 depends_on);
3030 else
3032 cost += force_var_cost (data, cbase, depends_on);
3033 cost += add_cost (TYPE_MODE (ctype));
3034 cost += difference_cost (data,
3035 ubase, integer_zero_node,
3036 &symbol_present, &var_present, &offset,
3037 depends_on);
3040 /* If we are after the increment, the value of the candidate is higher by
3041 one iteration. */
3042 if (stmt_after_increment (data->current_loop, cand, at))
3043 offset -= ratio * cstepi;
3045 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3046 (symbol/var/const parts may be omitted). If we are looking for an address,
3047 find the cost of addressing this. */
3048 if (address_p)
3049 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3051 /* Otherwise estimate the costs for computing the expression. */
3052 aratio = ratio > 0 ? ratio : -ratio;
3053 if (!symbol_present && !var_present && !offset)
3055 if (ratio != 1)
3056 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3058 return cost;
3061 if (aratio != 1)
3062 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3064 n_sums = 1;
3065 if (var_present
3066 /* Symbol + offset should be compile-time computable. */
3067 && (symbol_present || offset))
3068 n_sums++;
3070 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3072 fallback:
3074 /* Just get the expression, expand it and measure the cost. */
3075 tree comp = get_computation_at (data->current_loop, use, cand, at);
3077 if (!comp)
3078 return INFTY;
3080 if (address_p)
3081 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3083 return computation_cost (comp);
3087 /* Determines the cost of the computation by that USE is expressed
3088 from induction variable CAND. If ADDRESS_P is true, we just need
3089 to create an address from it, otherwise we want to get it into
3090 register. A set of invariants we depend on is stored in
3091 DEPENDS_ON. */
3093 static unsigned
3094 get_computation_cost (struct ivopts_data *data,
3095 struct iv_use *use, struct iv_cand *cand,
3096 bool address_p, bitmap *depends_on)
3098 return get_computation_cost_at (data,
3099 use, cand, address_p, depends_on, use->stmt);
3102 /* Determines cost of basing replacement of USE on CAND in a generic
3103 expression. */
3105 static bool
3106 determine_use_iv_cost_generic (struct ivopts_data *data,
3107 struct iv_use *use, struct iv_cand *cand)
3109 bitmap depends_on;
3110 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
3112 set_use_iv_cost (data, use, cand, cost, depends_on);
3114 return cost != INFTY;
3117 /* Determines cost of basing replacement of USE on CAND in an address. */
3119 static bool
3120 determine_use_iv_cost_address (struct ivopts_data *data,
3121 struct iv_use *use, struct iv_cand *cand)
3123 bitmap depends_on;
3124 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3126 set_use_iv_cost (data, use, cand, cost, depends_on);
3128 return cost != INFTY;
3131 /* Computes value of induction variable IV in iteration NITER. */
3133 static tree
3134 iv_value (struct iv *iv, tree niter)
3136 tree val;
3137 tree type = TREE_TYPE (iv->base);
3139 niter = fold_convert (type, niter);
3140 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3142 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3145 /* Computes value of candidate CAND at position AT in iteration NITER. */
3147 static tree
3148 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3150 tree val = iv_value (cand->iv, niter);
3151 tree type = TREE_TYPE (cand->iv->base);
3153 if (stmt_after_increment (loop, cand, at))
3154 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3156 return val;
3159 /* Check whether it is possible to express the condition in USE by comparison
3160 of candidate CAND. If so, store the comparison code to COMPARE and the
3161 value compared with to BOUND. */
3163 static bool
3164 may_eliminate_iv (struct loop *loop,
3165 struct iv_use *use, struct iv_cand *cand,
3166 enum tree_code *compare, tree *bound)
3168 basic_block ex_bb;
3169 edge exit;
3170 struct tree_niter_desc niter, new_niter;
3171 tree wider_type, type, base;
3173 /* For now works only for exits that dominate the loop latch. TODO -- extend
3174 for other conditions inside loop body. */
3175 ex_bb = bb_for_stmt (use->stmt);
3176 if (use->stmt != last_stmt (ex_bb)
3177 || TREE_CODE (use->stmt) != COND_EXPR)
3178 return false;
3179 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3180 return false;
3182 exit = EDGE_SUCC (ex_bb, 0);
3183 if (flow_bb_inside_loop_p (loop, exit->dest))
3184 exit = EDGE_SUCC (ex_bb, 1);
3185 if (flow_bb_inside_loop_p (loop, exit->dest))
3186 return false;
3188 niter.niter = NULL_TREE;
3189 number_of_iterations_exit (loop, exit, &niter);
3190 if (!niter.niter
3191 || !integer_nonzerop (niter.assumptions)
3192 || !integer_zerop (niter.may_be_zero))
3193 return false;
3195 if (exit->flags & EDGE_TRUE_VALUE)
3196 *compare = EQ_EXPR;
3197 else
3198 *compare = NE_EXPR;
3200 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3202 /* Let us check there is not some problem with overflows, by checking that
3203 the number of iterations is unchanged. */
3204 base = cand->iv->base;
3205 type = TREE_TYPE (base);
3206 if (stmt_after_increment (loop, cand, use->stmt))
3207 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3209 new_niter.niter = NULL_TREE;
3210 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3211 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3212 &new_niter);
3213 if (!new_niter.niter
3214 || !integer_nonzerop (new_niter.assumptions)
3215 || !integer_zerop (new_niter.may_be_zero))
3216 return false;
3218 wider_type = TREE_TYPE (new_niter.niter);
3219 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3220 wider_type = TREE_TYPE (niter.niter);
3221 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3222 fold_convert (wider_type, new_niter.niter), 0))
3223 return false;
3225 return true;
3228 /* Determines cost of basing replacement of USE on CAND in a condition. */
3230 static bool
3231 determine_use_iv_cost_condition (struct ivopts_data *data,
3232 struct iv_use *use, struct iv_cand *cand)
3234 tree bound;
3235 enum tree_code compare;
3237 /* Only consider real candidates. */
3238 if (!cand->iv)
3240 set_use_iv_cost (data, use, cand, INFTY, NULL);
3241 return false;
3244 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3246 bitmap depends_on = NULL;
3247 unsigned cost = force_var_cost (data, bound, &depends_on);
3249 set_use_iv_cost (data, use, cand, cost, depends_on);
3250 return cost != INFTY;
3253 /* The induction variable elimination failed; just express the original
3254 giv. If it is compared with an invariant, note that we cannot get
3255 rid of it. */
3256 if (TREE_CODE (*use->op_p) == SSA_NAME)
3257 record_invariant (data, *use->op_p, true);
3258 else
3260 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3261 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3264 return determine_use_iv_cost_generic (data, use, cand);
3267 /* Checks whether it is possible to replace the final value of USE by
3268 a direct computation. If so, the formula is stored to *VALUE. */
3270 static bool
3271 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3273 edge exit;
3274 struct tree_niter_desc *niter;
3276 exit = single_dom_exit (loop);
3277 if (!exit)
3278 return false;
3280 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3281 bb_for_stmt (use->stmt)));
3283 niter = &loop_data (loop)->niter;
3284 if (!niter->niter
3285 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3286 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3287 return false;
3289 *value = iv_value (use->iv, niter->niter);
3291 return true;
3294 /* Determines cost of replacing final value of USE using CAND. */
3296 static bool
3297 determine_use_iv_cost_outer (struct ivopts_data *data,
3298 struct iv_use *use, struct iv_cand *cand)
3300 bitmap depends_on;
3301 unsigned cost;
3302 edge exit;
3303 tree value;
3304 struct loop *loop = data->current_loop;
3306 if (!cand->iv)
3308 if (!may_replace_final_value (loop, use, &value))
3310 set_use_iv_cost (data, use, cand, INFTY, NULL);
3311 return false;
3314 depends_on = NULL;
3315 cost = force_var_cost (data, value, &depends_on);
3317 cost /= AVG_LOOP_NITER (loop);
3319 set_use_iv_cost (data, use, cand, cost, depends_on);
3320 return cost != INFTY;
3323 exit = single_dom_exit (loop);
3324 if (exit)
3326 /* If there is just a single exit, we may use value of the candidate
3327 after we take it to determine the value of use. */
3328 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3329 last_stmt (exit->src));
3330 if (cost != INFTY)
3331 cost /= AVG_LOOP_NITER (loop);
3333 else
3335 /* Otherwise we just need to compute the iv. */
3336 cost = get_computation_cost (data, use, cand, false, &depends_on);
3339 set_use_iv_cost (data, use, cand, cost, depends_on);
3341 return cost != INFTY;
3344 /* Determines cost of basing replacement of USE on CAND. Returns false
3345 if USE cannot be based on CAND. */
3347 static bool
3348 determine_use_iv_cost (struct ivopts_data *data,
3349 struct iv_use *use, struct iv_cand *cand)
3351 switch (use->type)
3353 case USE_NONLINEAR_EXPR:
3354 return determine_use_iv_cost_generic (data, use, cand);
3356 case USE_OUTER:
3357 return determine_use_iv_cost_outer (data, use, cand);
3359 case USE_ADDRESS:
3360 return determine_use_iv_cost_address (data, use, cand);
3362 case USE_COMPARE:
3363 return determine_use_iv_cost_condition (data, use, cand);
3365 default:
3366 gcc_unreachable ();
3370 /* Determines costs of basing the use of the iv on an iv candidate. */
3372 static void
3373 determine_use_iv_costs (struct ivopts_data *data)
3375 unsigned i, j;
3376 struct iv_use *use;
3377 struct iv_cand *cand;
3378 bitmap to_clear = BITMAP_XMALLOC ();
3380 alloc_use_cost_map (data);
3382 for (i = 0; i < n_iv_uses (data); i++)
3384 use = iv_use (data, i);
3386 if (data->consider_all_candidates)
3388 for (j = 0; j < n_iv_cands (data); j++)
3390 cand = iv_cand (data, j);
3391 determine_use_iv_cost (data, use, cand);
3394 else
3396 bitmap_iterator bi;
3398 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3400 cand = iv_cand (data, j);
3401 if (!determine_use_iv_cost (data, use, cand))
3402 bitmap_set_bit (to_clear, j);
3405 /* Remove the candidates for that the cost is infinite from
3406 the list of related candidates. */
3407 bitmap_and_compl_into (use->related_cands, to_clear);
3408 bitmap_clear (to_clear);
3412 BITMAP_XFREE (to_clear);
3414 if (dump_file && (dump_flags & TDF_DETAILS))
3416 fprintf (dump_file, "Use-candidate costs:\n");
3418 for (i = 0; i < n_iv_uses (data); i++)
3420 use = iv_use (data, i);
3422 fprintf (dump_file, "Use %d:\n", i);
3423 fprintf (dump_file, " cand\tcost\tdepends on\n");
3424 for (j = 0; j < use->n_map_members; j++)
3426 if (!use->cost_map[j].cand
3427 || use->cost_map[j].cost == INFTY)
3428 continue;
3430 fprintf (dump_file, " %d\t%d\t",
3431 use->cost_map[j].cand->id,
3432 use->cost_map[j].cost);
3433 if (use->cost_map[j].depends_on)
3434 bitmap_print (dump_file,
3435 use->cost_map[j].depends_on, "","");
3436 fprintf (dump_file, "\n");
3439 fprintf (dump_file, "\n");
3441 fprintf (dump_file, "\n");
3445 /* Determines cost of the candidate CAND. */
3447 static void
3448 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3450 unsigned cost_base, cost_step;
3451 tree base, last;
3452 basic_block bb;
3454 if (!cand->iv)
3456 cand->cost = 0;
3457 return;
3460 /* There are two costs associated with the candidate -- its increment
3461 and its initialization. The second is almost negligible for any loop
3462 that rolls enough, so we take it just very little into account. */
3464 base = cand->iv->base;
3465 cost_base = force_var_cost (data, base, NULL);
3466 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3468 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3470 /* Prefer the original iv unless we may gain something by replacing it. */
3471 if (cand->pos == IP_ORIGINAL)
3472 cand->cost--;
3474 /* Prefer not to insert statements into latch unless there are some
3475 already (so that we do not create unnecessary jumps). */
3476 if (cand->pos == IP_END)
3478 bb = ip_end_pos (data->current_loop);
3479 last = last_stmt (bb);
3481 if (!last
3482 || TREE_CODE (last) == LABEL_EXPR)
3483 cand->cost++;
3487 /* Determines costs of computation of the candidates. */
3489 static void
3490 determine_iv_costs (struct ivopts_data *data)
3492 unsigned i;
3494 if (dump_file && (dump_flags & TDF_DETAILS))
3496 fprintf (dump_file, "Candidate costs:\n");
3497 fprintf (dump_file, " cand\tcost\n");
3500 for (i = 0; i < n_iv_cands (data); i++)
3502 struct iv_cand *cand = iv_cand (data, i);
3504 determine_iv_cost (data, cand);
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3507 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3511 fprintf (dump_file, "\n");
3514 /* Calculates cost for having SIZE induction variables. */
3516 static unsigned
3517 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3519 return global_cost_for_size (size,
3520 loop_data (data->current_loop)->regs_used,
3521 n_iv_uses (data));
3524 /* For each size of the induction variable set determine the penalty. */
3526 static void
3527 determine_set_costs (struct ivopts_data *data)
3529 unsigned j, n;
3530 tree phi, op;
3531 struct loop *loop = data->current_loop;
3532 bitmap_iterator bi;
3534 /* We use the following model (definitely improvable, especially the
3535 cost function -- TODO):
3537 We estimate the number of registers available (using MD data), name it A.
3539 We estimate the number of registers used by the loop, name it U. This
3540 number is obtained as the number of loop phi nodes (not counting virtual
3541 registers and bivs) + the number of variables from outside of the loop.
3543 We set a reserve R (free regs that are used for temporary computations,
3544 etc.). For now the reserve is a constant 3.
3546 Let I be the number of induction variables.
3548 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3549 make a lot of ivs without a reason).
3550 -- if A - R < U + I <= A, the cost is I * PRES_COST
3551 -- if U + I > A, the cost is I * PRES_COST and
3552 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3554 if (dump_file && (dump_flags & TDF_DETAILS))
3556 fprintf (dump_file, "Global costs:\n");
3557 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3558 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3559 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3560 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3563 n = 0;
3564 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3566 op = PHI_RESULT (phi);
3568 if (!is_gimple_reg (op))
3569 continue;
3571 if (get_iv (data, op))
3572 continue;
3574 n++;
3577 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3579 struct version_info *info = ver_info (data, j);
3581 if (info->inv_id && info->has_nonlin_use)
3582 n++;
3585 loop_data (loop)->regs_used = n;
3586 if (dump_file && (dump_flags & TDF_DETAILS))
3587 fprintf (dump_file, " regs_used %d\n", n);
3589 if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (dump_file, " cost for size:\n");
3592 fprintf (dump_file, " ivs\tcost\n");
3593 for (j = 0; j <= 2 * target_avail_regs; j++)
3594 fprintf (dump_file, " %d\t%d\n", j,
3595 ivopts_global_cost_for_size (data, j));
3596 fprintf (dump_file, "\n");
3600 /* Returns true if A is a cheaper cost pair than B. */
3602 static bool
3603 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3605 if (!a)
3606 return false;
3608 if (!b)
3609 return true;
3611 if (a->cost < b->cost)
3612 return true;
3614 if (a->cost > b->cost)
3615 return false;
3617 /* In case the costs are the same, prefer the cheaper candidate. */
3618 if (a->cand->cost < b->cand->cost)
3619 return true;
3621 return false;
3624 /* Computes the cost field of IVS structure. */
3626 static void
3627 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3629 unsigned cost = 0;
3631 cost += ivs->cand_use_cost;
3632 cost += ivs->cand_cost;
3633 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3635 ivs->cost = cost;
3638 /* Set USE not to be expressed by any candidate in IVS. */
3640 static void
3641 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3642 struct iv_use *use)
3644 unsigned uid = use->id, cid, iid;
3645 bitmap deps;
3646 struct cost_pair *cp;
3647 bitmap_iterator bi;
3649 cp = ivs->cand_for_use[uid];
3650 if (!cp)
3651 return;
3652 cid = cp->cand->id;
3654 ivs->bad_uses++;
3655 ivs->cand_for_use[uid] = NULL;
3656 ivs->n_cand_uses[cid]--;
3658 if (ivs->n_cand_uses[cid] == 0)
3660 bitmap_clear_bit (ivs->cands, cid);
3661 /* Do not count the pseudocandidates. */
3662 if (cp->cand->iv)
3663 ivs->n_regs--;
3664 ivs->cand_cost -= cp->cand->cost;
3667 ivs->cand_use_cost -= cp->cost;
3669 deps = cp->depends_on;
3671 if (deps)
3673 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3675 ivs->n_invariant_uses[iid]--;
3676 if (ivs->n_invariant_uses[iid] == 0)
3677 ivs->n_regs--;
3681 iv_ca_recount_cost (data, ivs);
3684 /* Set cost pair for USE in set IVS to CP. */
3686 static void
3687 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3688 struct iv_use *use, struct cost_pair *cp)
3690 unsigned uid = use->id, cid, iid;
3691 bitmap deps;
3692 bitmap_iterator bi;
3694 if (ivs->cand_for_use[uid] == cp)
3695 return;
3697 if (ivs->cand_for_use[uid])
3698 iv_ca_set_no_cp (data, ivs, use);
3700 if (cp)
3702 cid = cp->cand->id;
3704 ivs->bad_uses--;
3705 ivs->cand_for_use[uid] = cp;
3706 ivs->n_cand_uses[cid]++;
3707 if (ivs->n_cand_uses[cid] == 1)
3709 bitmap_set_bit (ivs->cands, cid);
3710 /* Do not count the pseudocandidates. */
3711 if (cp->cand->iv)
3712 ivs->n_regs++;
3713 ivs->cand_cost += cp->cand->cost;
3716 ivs->cand_use_cost += cp->cost;
3718 deps = cp->depends_on;
3720 if (deps)
3722 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3724 ivs->n_invariant_uses[iid]++;
3725 if (ivs->n_invariant_uses[iid] == 1)
3726 ivs->n_regs++;
3730 iv_ca_recount_cost (data, ivs);
3734 /* Extend set IVS by expressing USE by some of the candidates in it
3735 if possible. */
3737 static void
3738 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3739 struct iv_use *use)
3741 struct cost_pair *best_cp = NULL, *cp;
3742 bitmap_iterator bi;
3743 unsigned i;
3745 gcc_assert (ivs->upto >= use->id);
3747 if (ivs->upto == use->id)
3749 ivs->upto++;
3750 ivs->bad_uses++;
3753 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3755 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3757 if (cheaper_cost_pair (cp, best_cp))
3758 best_cp = cp;
3761 iv_ca_set_cp (data, ivs, use, best_cp);
3764 /* Get cost for assignment IVS. */
3766 static unsigned
3767 iv_ca_cost (struct iv_ca *ivs)
3769 return (ivs->bad_uses ? INFTY : ivs->cost);
3772 /* Returns true if all dependences of CP are among invariants in IVS. */
3774 static bool
3775 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3777 unsigned i;
3778 bitmap_iterator bi;
3780 if (!cp->depends_on)
3781 return true;
3783 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3785 if (ivs->n_invariant_uses[i] == 0)
3786 return false;
3789 return true;
3792 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3793 it before NEXT_CHANGE. */
3795 static struct iv_ca_delta *
3796 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3797 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3799 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3801 change->use = use;
3802 change->old_cp = old_cp;
3803 change->new_cp = new_cp;
3804 change->next_change = next_change;
3806 return change;
3809 /* Returns candidate by that USE is expressed in IVS. */
3811 static struct cost_pair *
3812 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3814 return ivs->cand_for_use[use->id];
3817 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3818 reverted instead. */
3820 static void
3821 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3822 struct iv_ca_delta *delta, bool forward)
3824 struct cost_pair *from, *to;
3826 for (; delta; delta = delta->next_change)
3828 if (forward)
3830 from = delta->old_cp;
3831 to = delta->new_cp;
3833 else
3835 from = delta->new_cp;
3836 to = delta->old_cp;
3839 gcc_assert (iv_ca_cand_for_use (ivs, delta->use) == from);
3840 iv_ca_set_cp (data, ivs, delta->use, to);
3844 /* Returns true if CAND is used in IVS. */
3846 static bool
3847 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3849 return ivs->n_cand_uses[cand->id] > 0;
3852 /* Free the list of changes DELTA. */
3854 static void
3855 iv_ca_delta_free (struct iv_ca_delta **delta)
3857 struct iv_ca_delta *act, *next;
3859 for (act = *delta; act; act = next)
3861 next = act->next_change;
3862 free (act);
3865 *delta = NULL;
3868 /* Allocates new iv candidates assignment. */
3870 static struct iv_ca *
3871 iv_ca_new (struct ivopts_data *data)
3873 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3875 nw->upto = 0;
3876 nw->bad_uses = 0;
3877 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3878 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
3879 nw->cands = BITMAP_XMALLOC ();
3880 nw->n_regs = 0;
3881 nw->cand_use_cost = 0;
3882 nw->cand_cost = 0;
3883 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
3884 nw->cost = 0;
3886 return nw;
3889 /* Free memory occupied by the set IVS. */
3891 static void
3892 iv_ca_free (struct iv_ca **ivs)
3894 free ((*ivs)->cand_for_use);
3895 free ((*ivs)->n_cand_uses);
3896 BITMAP_XFREE ((*ivs)->cands);
3897 free ((*ivs)->n_invariant_uses);
3898 free (*ivs);
3899 *ivs = NULL;
3902 /* Dumps IVS to FILE. */
3904 static void
3905 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
3907 const char *pref = " invariants ";
3908 unsigned i;
3910 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
3911 bitmap_print (file, ivs->cands, " candidates ","\n");
3913 for (i = 1; i <= data->max_inv_id; i++)
3914 if (ivs->n_invariant_uses[i])
3916 fprintf (file, "%s%d", pref, i);
3917 pref = ", ";
3919 fprintf (file, "\n");
3922 /* Try changing candidate in IVS to CAND for each use. Return cost of the
3923 new set, and store differences in DELTA. */
3925 static unsigned
3926 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
3927 struct iv_cand *cand, struct iv_ca_delta **delta)
3929 unsigned i, cost;
3930 struct iv_use *use;
3931 struct cost_pair *old_cp, *new_cp;
3933 *delta = NULL;
3934 for (i = 0; i < ivs->upto; i++)
3936 use = iv_use (data, i);
3937 old_cp = iv_ca_cand_for_use (ivs, use);
3939 if (old_cp
3940 && old_cp->cand == cand)
3941 continue;
3943 new_cp = get_use_iv_cost (data, use, cand);
3944 if (!new_cp)
3945 continue;
3947 if (!iv_ca_has_deps (ivs, new_cp))
3948 continue;
3950 if (!cheaper_cost_pair (new_cp, old_cp))
3951 continue;
3953 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
3956 iv_ca_delta_commit (data, ivs, *delta, true);
3957 cost = iv_ca_cost (ivs);
3958 iv_ca_delta_commit (data, ivs, *delta, false);
3960 return cost;
3963 /* Try narrowing set IVS by removing CAND. Return the cost of
3964 the new set and store the differences in DELTA. */
3966 static unsigned
3967 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
3968 struct iv_cand *cand, struct iv_ca_delta **delta)
3970 unsigned i, ci;
3971 struct iv_use *use;
3972 struct cost_pair *old_cp, *new_cp, *cp;
3973 bitmap_iterator bi;
3974 struct iv_cand *cnd;
3975 unsigned cost;
3977 *delta = NULL;
3978 for (i = 0; i < n_iv_uses (data); i++)
3980 use = iv_use (data, i);
3982 old_cp = iv_ca_cand_for_use (ivs, use);
3983 if (old_cp->cand != cand)
3984 continue;
3986 new_cp = NULL;
3988 if (data->consider_all_candidates)
3990 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
3992 if (ci == cand->id)
3993 continue;
3995 cnd = iv_cand (data, ci);
3997 cp = get_use_iv_cost (data, use, cnd);
3998 if (!cp)
3999 continue;
4000 if (!iv_ca_has_deps (ivs, cp))
4001 continue;
4003 if (!cheaper_cost_pair (cp, new_cp))
4004 continue;
4006 new_cp = cp;
4009 else
4011 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4013 if (ci == cand->id)
4014 continue;
4016 cnd = iv_cand (data, ci);
4018 cp = get_use_iv_cost (data, use, cnd);
4019 if (!cp)
4020 continue;
4021 if (!iv_ca_has_deps (ivs, cp))
4022 continue;
4024 if (!cheaper_cost_pair (cp, new_cp))
4025 continue;
4027 new_cp = cp;
4031 if (!new_cp)
4033 iv_ca_delta_free (delta);
4034 return INFTY;
4037 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4040 iv_ca_delta_commit (data, ivs, *delta, true);
4041 cost = iv_ca_cost (ivs);
4042 iv_ca_delta_commit (data, ivs, *delta, false);
4044 return cost;
4047 /* Tries to extend the sets IVS in the best possible way in order
4048 to express the USE. */
4050 static bool
4051 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4052 struct iv_use *use)
4054 unsigned best_cost, act_cost;
4055 unsigned i;
4056 bitmap_iterator bi;
4057 struct iv_cand *cand;
4058 struct iv_ca_delta *best_delta = NULL, *act_delta;
4059 struct cost_pair *cp;
4061 iv_ca_add_use (data, ivs, use);
4062 best_cost = iv_ca_cost (ivs);
4064 cp = iv_ca_cand_for_use (ivs, use);
4065 if (cp)
4067 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4068 iv_ca_set_no_cp (data, ivs, use);
4071 /* First try important candidates. Only if it fails, try the specific ones.
4072 Rationale -- in loops with many variables the best choice often is to use
4073 just one generic biv. If we added here many ivs specific to the uses,
4074 the optimization algorithm later would be likely to get stuck in a local
4075 minimum, thus causing us to create too many ivs. The approach from
4076 few ivs to more seems more likely to be successful -- starting from few
4077 ivs, replacing an expensive use by a specific iv should always be a
4078 win. */
4079 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4081 cand = iv_cand (data, i);
4083 if (iv_ca_cand_used_p (ivs, cand))
4084 continue;
4086 cp = get_use_iv_cost (data, use, cand);
4087 if (!cp)
4088 continue;
4090 iv_ca_set_cp (data, ivs, use, cp);
4091 act_cost = iv_ca_extend (data, ivs, cand, &act_delta);
4092 iv_ca_set_no_cp (data, ivs, use);
4093 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4095 if (act_cost < best_cost)
4097 best_cost = act_cost;
4099 iv_ca_delta_free (&best_delta);
4100 best_delta = act_delta;
4102 else
4103 iv_ca_delta_free (&act_delta);
4106 if (best_cost == INFTY)
4108 for (i = 0; i < use->n_map_members; i++)
4110 cp = use->cost_map + i;
4111 cand = cp->cand;
4112 if (!cand)
4113 continue;
4115 /* Already tried this. */
4116 if (cand->important)
4117 continue;
4119 if (iv_ca_cand_used_p (ivs, cand))
4120 continue;
4122 act_delta = NULL;
4123 iv_ca_set_cp (data, ivs, use, cp);
4124 act_cost = iv_ca_extend (data, ivs, cand, &act_delta);
4125 iv_ca_set_no_cp (data, ivs, use);
4126 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4127 cp, act_delta);
4129 if (act_cost < best_cost)
4131 best_cost = act_cost;
4133 if (best_delta)
4134 iv_ca_delta_free (&best_delta);
4135 best_delta = act_delta;
4137 else
4138 iv_ca_delta_free (&act_delta);
4142 iv_ca_delta_commit (data, ivs, best_delta, true);
4143 iv_ca_delta_free (&best_delta);
4145 return (best_cost != INFTY);
4148 /* Finds an initial assignment of candidates to uses. */
4150 static struct iv_ca *
4151 get_initial_solution (struct ivopts_data *data)
4153 struct iv_ca *ivs = iv_ca_new (data);
4154 unsigned i;
4156 for (i = 0; i < n_iv_uses (data); i++)
4157 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4159 iv_ca_free (&ivs);
4160 return NULL;
4163 return ivs;
4166 /* Tries to improve set of induction variables IVS. */
4168 static bool
4169 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4171 unsigned i, acost, best_cost = iv_ca_cost (ivs);
4172 struct iv_ca_delta *best_delta = NULL, *act_delta;
4173 struct iv_cand *cand;
4175 /* Try altering the set of induction variables by one. */
4176 for (i = 0; i < n_iv_cands (data); i++)
4178 cand = iv_cand (data, i);
4180 if (iv_ca_cand_used_p (ivs, cand))
4181 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4182 else
4183 acost = iv_ca_extend (data, ivs, cand, &act_delta);
4185 if (acost < best_cost)
4187 best_cost = acost;
4188 if (best_delta)
4189 iv_ca_delta_free (&best_delta);
4190 best_delta = act_delta;
4192 else
4193 iv_ca_delta_free (&act_delta);
4196 if (!best_delta)
4197 return false;
4199 iv_ca_delta_commit (data, ivs, best_delta, true);
4200 iv_ca_delta_free (&best_delta);
4201 return true;
4204 /* Attempts to find the optimal set of induction variables. We do simple
4205 greedy heuristic -- we try to replace at most one candidate in the selected
4206 solution and remove the unused ivs while this improves the cost. */
4208 static struct iv_ca *
4209 find_optimal_iv_set (struct ivopts_data *data)
4211 unsigned i;
4212 struct iv_ca *set;
4213 struct iv_use *use;
4215 /* Get the initial solution. */
4216 set = get_initial_solution (data);
4217 if (!set)
4219 if (dump_file && (dump_flags & TDF_DETAILS))
4220 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4221 return NULL;
4224 if (dump_file && (dump_flags & TDF_DETAILS))
4226 fprintf (dump_file, "Initial set of candidates:\n");
4227 iv_ca_dump (data, dump_file, set);
4230 while (try_improve_iv_set (data, set))
4232 if (dump_file && (dump_flags & TDF_DETAILS))
4234 fprintf (dump_file, "Improved to:\n");
4235 iv_ca_dump (data, dump_file, set);
4239 if (dump_file && (dump_flags & TDF_DETAILS))
4240 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4242 for (i = 0; i < n_iv_uses (data); i++)
4244 use = iv_use (data, i);
4245 use->selected = iv_ca_cand_for_use (set, use)->cand;
4248 return set;
4251 /* Creates a new induction variable corresponding to CAND. */
4253 static void
4254 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4256 block_stmt_iterator incr_pos;
4257 tree base;
4258 bool after = false;
4260 if (!cand->iv)
4261 return;
4263 switch (cand->pos)
4265 case IP_NORMAL:
4266 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4267 break;
4269 case IP_END:
4270 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4271 after = true;
4272 break;
4274 case IP_ORIGINAL:
4275 /* Mark that the iv is preserved. */
4276 name_info (data, cand->var_before)->preserve_biv = true;
4277 name_info (data, cand->var_after)->preserve_biv = true;
4279 /* Rewrite the increment so that it uses var_before directly. */
4280 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4282 return;
4285 gimple_add_tmp_var (cand->var_before);
4286 add_referenced_tmp_var (cand->var_before);
4288 base = unshare_expr (cand->iv->base);
4290 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4291 &incr_pos, after, &cand->var_before, &cand->var_after);
4294 /* Creates new induction variables described in SET. */
4296 static void
4297 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4299 unsigned i;
4300 struct iv_cand *cand;
4301 bitmap_iterator bi;
4303 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4305 cand = iv_cand (data, i);
4306 create_new_iv (data, cand);
4310 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4311 is true, remove also the ssa name defined by the statement. */
4313 static void
4314 remove_statement (tree stmt, bool including_defined_name)
4316 if (TREE_CODE (stmt) == PHI_NODE)
4318 if (!including_defined_name)
4320 /* Prevent the ssa name defined by the statement from being removed. */
4321 SET_PHI_RESULT (stmt, NULL);
4323 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4325 else
4327 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4329 bsi_remove (&bsi);
4333 /* Rewrites USE (definition of iv used in a nonlinear expression)
4334 using candidate CAND. */
4336 static void
4337 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4338 struct iv_use *use, struct iv_cand *cand)
4340 tree comp = unshare_expr (get_computation (data->current_loop,
4341 use, cand));
4342 tree op, stmts, tgt, ass;
4343 block_stmt_iterator bsi, pbsi;
4345 switch (TREE_CODE (use->stmt))
4347 case PHI_NODE:
4348 tgt = PHI_RESULT (use->stmt);
4350 /* If we should keep the biv, do not replace it. */
4351 if (name_info (data, tgt)->preserve_biv)
4352 return;
4354 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4355 while (!bsi_end_p (pbsi)
4356 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4358 bsi = pbsi;
4359 bsi_next (&pbsi);
4361 break;
4363 case MODIFY_EXPR:
4364 tgt = TREE_OPERAND (use->stmt, 0);
4365 bsi = bsi_for_stmt (use->stmt);
4366 break;
4368 default:
4369 gcc_unreachable ();
4372 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4374 if (TREE_CODE (use->stmt) == PHI_NODE)
4376 if (stmts)
4377 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4378 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4379 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4380 remove_statement (use->stmt, false);
4381 SSA_NAME_DEF_STMT (tgt) = ass;
4383 else
4385 if (stmts)
4386 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4387 TREE_OPERAND (use->stmt, 1) = op;
4391 /* Replaces ssa name in index IDX by its basic variable. Callback for
4392 for_each_index. */
4394 static bool
4395 idx_remove_ssa_names (tree base, tree *idx,
4396 void *data ATTRIBUTE_UNUSED)
4398 tree *op;
4400 if (TREE_CODE (*idx) == SSA_NAME)
4401 *idx = SSA_NAME_VAR (*idx);
4403 if (TREE_CODE (base) == ARRAY_REF)
4405 op = &TREE_OPERAND (base, 2);
4406 if (*op
4407 && TREE_CODE (*op) == SSA_NAME)
4408 *op = SSA_NAME_VAR (*op);
4409 op = &TREE_OPERAND (base, 3);
4410 if (*op
4411 && TREE_CODE (*op) == SSA_NAME)
4412 *op = SSA_NAME_VAR (*op);
4415 return true;
4418 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4420 static tree
4421 unshare_and_remove_ssa_names (tree ref)
4423 ref = unshare_expr (ref);
4424 for_each_index (&ref, idx_remove_ssa_names, NULL);
4426 return ref;
4429 /* Rewrites base of memory access OP with expression WITH in statement
4430 pointed to by BSI. */
4432 static void
4433 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4435 tree bvar, var, new_var, new_name, copy, name;
4436 tree orig;
4438 var = bvar = get_base_address (*op);
4440 if (!var || TREE_CODE (with) != SSA_NAME)
4441 goto do_rewrite;
4443 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4444 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4445 if (TREE_CODE (var) == INDIRECT_REF)
4446 var = TREE_OPERAND (var, 0);
4447 if (TREE_CODE (var) == SSA_NAME)
4449 name = var;
4450 var = SSA_NAME_VAR (var);
4452 else if (DECL_P (var))
4453 name = NULL_TREE;
4454 else
4455 goto do_rewrite;
4457 if (var_ann (var)->type_mem_tag)
4458 var = var_ann (var)->type_mem_tag;
4460 /* We need to add a memory tag for the variable. But we do not want
4461 to add it to the temporary used for the computations, since this leads
4462 to problems in redundancy elimination when there are common parts
4463 in two computations referring to the different arrays. So we copy
4464 the variable to a new temporary. */
4465 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4466 if (name)
4467 new_name = duplicate_ssa_name (name, copy);
4468 else
4470 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4471 add_referenced_tmp_var (new_var);
4472 var_ann (new_var)->type_mem_tag = var;
4473 new_name = make_ssa_name (new_var, copy);
4475 TREE_OPERAND (copy, 0) = new_name;
4476 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4477 with = new_name;
4479 do_rewrite:
4481 orig = NULL_TREE;
4482 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4483 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4485 if (TREE_CODE (*op) == INDIRECT_REF)
4486 orig = REF_ORIGINAL (*op);
4487 if (!orig)
4488 orig = unshare_and_remove_ssa_names (*op);
4490 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4492 /* Record the original reference, for purposes of alias analysis. */
4493 REF_ORIGINAL (*op) = orig;
4496 /* Rewrites USE (address that is an iv) using candidate CAND. */
4498 static void
4499 rewrite_use_address (struct ivopts_data *data,
4500 struct iv_use *use, struct iv_cand *cand)
4502 tree comp = unshare_expr (get_computation (data->current_loop,
4503 use, cand));
4504 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4505 tree stmts;
4506 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4508 if (stmts)
4509 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4511 rewrite_address_base (&bsi, use->op_p, op);
4514 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4515 candidate CAND. */
4517 static void
4518 rewrite_use_compare (struct ivopts_data *data,
4519 struct iv_use *use, struct iv_cand *cand)
4521 tree comp;
4522 tree *op_p, cond, op, stmts, bound;
4523 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4524 enum tree_code compare;
4526 if (may_eliminate_iv (data->current_loop,
4527 use, cand, &compare, &bound))
4529 op = force_gimple_operand (unshare_expr (bound), &stmts,
4530 true, NULL_TREE);
4532 if (stmts)
4533 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4535 *use->op_p = build2 (compare, boolean_type_node,
4536 var_at_stmt (data->current_loop,
4537 cand, use->stmt), op);
4538 modify_stmt (use->stmt);
4539 return;
4542 /* The induction variable elimination failed; just express the original
4543 giv. */
4544 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4546 cond = *use->op_p;
4547 op_p = &TREE_OPERAND (cond, 0);
4548 if (TREE_CODE (*op_p) != SSA_NAME
4549 || zero_p (get_iv (data, *op_p)->step))
4550 op_p = &TREE_OPERAND (cond, 1);
4552 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4553 if (stmts)
4554 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4556 *op_p = op;
4559 /* Ensure that operand *OP_P may be used at the end of EXIT without
4560 violating loop closed ssa form. */
4562 static void
4563 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4565 basic_block def_bb;
4566 struct loop *def_loop;
4567 tree phi, use;
4569 use = USE_FROM_PTR (op_p);
4570 if (TREE_CODE (use) != SSA_NAME)
4571 return;
4573 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4574 if (!def_bb)
4575 return;
4577 def_loop = def_bb->loop_father;
4578 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4579 return;
4581 /* Try finding a phi node that copies the value out of the loop. */
4582 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4583 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4584 break;
4586 if (!phi)
4588 /* Create such a phi node. */
4589 tree new_name = duplicate_ssa_name (use, NULL);
4591 phi = create_phi_node (new_name, exit->dest);
4592 SSA_NAME_DEF_STMT (new_name) = phi;
4593 add_phi_arg (phi, use, exit);
4596 SET_USE (op_p, PHI_RESULT (phi));
4599 /* Ensure that operands of STMT may be used at the end of EXIT without
4600 violating loop closed ssa form. */
4602 static void
4603 protect_loop_closed_ssa_form (edge exit, tree stmt)
4605 use_optype uses;
4606 vuse_optype vuses;
4607 v_may_def_optype v_may_defs;
4608 unsigned i;
4610 get_stmt_operands (stmt);
4612 uses = STMT_USE_OPS (stmt);
4613 for (i = 0; i < NUM_USES (uses); i++)
4614 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4616 vuses = STMT_VUSE_OPS (stmt);
4617 for (i = 0; i < NUM_VUSES (vuses); i++)
4618 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4620 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4621 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4622 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4625 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4626 so that they are emitted on the correct place, and so that the loop closed
4627 ssa form is preserved. */
4629 static void
4630 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4632 tree_stmt_iterator tsi;
4633 block_stmt_iterator bsi;
4634 tree phi, stmt, def, next;
4636 if (EDGE_COUNT (exit->dest->preds) > 1)
4637 split_loop_exit_edge (exit);
4639 if (TREE_CODE (stmts) == STATEMENT_LIST)
4641 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4642 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4644 else
4645 protect_loop_closed_ssa_form (exit, stmts);
4647 /* Ensure there is label in exit->dest, so that we can
4648 insert after it. */
4649 tree_block_label (exit->dest);
4650 bsi = bsi_after_labels (exit->dest);
4651 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4653 if (!op)
4654 return;
4656 for (phi = phi_nodes (exit->dest); phi; phi = next)
4658 next = PHI_CHAIN (phi);
4660 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4662 def = PHI_RESULT (phi);
4663 remove_statement (phi, false);
4664 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4665 def, op);
4666 SSA_NAME_DEF_STMT (def) = stmt;
4667 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4672 /* Rewrites the final value of USE (that is only needed outside of the loop)
4673 using candidate CAND. */
4675 static void
4676 rewrite_use_outer (struct ivopts_data *data,
4677 struct iv_use *use, struct iv_cand *cand)
4679 edge exit;
4680 tree value, op, stmts, tgt;
4681 tree phi;
4683 switch (TREE_CODE (use->stmt))
4685 case PHI_NODE:
4686 tgt = PHI_RESULT (use->stmt);
4687 break;
4688 case MODIFY_EXPR:
4689 tgt = TREE_OPERAND (use->stmt, 0);
4690 break;
4691 default:
4692 gcc_unreachable ();
4695 exit = single_dom_exit (data->current_loop);
4697 if (exit)
4699 if (!cand->iv)
4701 bool ok = may_replace_final_value (data->current_loop, use, &value);
4702 gcc_assert (ok);
4704 else
4705 value = get_computation_at (data->current_loop,
4706 use, cand, last_stmt (exit->src));
4708 value = unshare_expr (value);
4709 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4711 /* If we will preserve the iv anyway and we would need to perform
4712 some computation to replace the final value, do nothing. */
4713 if (stmts && name_info (data, tgt)->preserve_biv)
4714 return;
4716 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4718 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4720 if (USE_FROM_PTR (use_p) == tgt)
4721 SET_USE (use_p, op);
4724 if (stmts)
4725 compute_phi_arg_on_exit (exit, stmts, op);
4727 /* Enable removal of the statement. We cannot remove it directly,
4728 since we may still need the aliasing information attached to the
4729 ssa name defined by it. */
4730 name_info (data, tgt)->iv->have_use_for = false;
4731 return;
4734 /* If the variable is going to be preserved anyway, there is nothing to
4735 do. */
4736 if (name_info (data, tgt)->preserve_biv)
4737 return;
4739 /* Otherwise we just need to compute the iv. */
4740 rewrite_use_nonlinear_expr (data, use, cand);
4743 /* Rewrites USE using candidate CAND. */
4745 static void
4746 rewrite_use (struct ivopts_data *data,
4747 struct iv_use *use, struct iv_cand *cand)
4749 switch (use->type)
4751 case USE_NONLINEAR_EXPR:
4752 rewrite_use_nonlinear_expr (data, use, cand);
4753 break;
4755 case USE_OUTER:
4756 rewrite_use_outer (data, use, cand);
4757 break;
4759 case USE_ADDRESS:
4760 rewrite_use_address (data, use, cand);
4761 break;
4763 case USE_COMPARE:
4764 rewrite_use_compare (data, use, cand);
4765 break;
4767 default:
4768 gcc_unreachable ();
4770 modify_stmt (use->stmt);
4773 /* Rewrite the uses using the selected induction variables. */
4775 static void
4776 rewrite_uses (struct ivopts_data *data)
4778 unsigned i;
4779 struct iv_cand *cand;
4780 struct iv_use *use;
4782 for (i = 0; i < n_iv_uses (data); i++)
4784 use = iv_use (data, i);
4785 cand = use->selected;
4786 gcc_assert (cand);
4788 rewrite_use (data, use, cand);
4792 /* Removes the ivs that are not used after rewriting. */
4794 static void
4795 remove_unused_ivs (struct ivopts_data *data)
4797 unsigned j;
4798 bitmap_iterator bi;
4800 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4802 struct version_info *info;
4804 info = ver_info (data, j);
4805 if (info->iv
4806 && !zero_p (info->iv->step)
4807 && !info->inv_id
4808 && !info->iv->have_use_for
4809 && !info->preserve_biv)
4810 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4814 /* Frees data allocated by the optimization of a single loop. */
4816 static void
4817 free_loop_data (struct ivopts_data *data)
4819 unsigned i, j;
4820 bitmap_iterator bi;
4822 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4824 struct version_info *info;
4826 info = ver_info (data, i);
4827 if (info->iv)
4828 free (info->iv);
4829 info->iv = NULL;
4830 info->has_nonlin_use = false;
4831 info->preserve_biv = false;
4832 info->inv_id = 0;
4834 bitmap_clear (data->relevant);
4835 bitmap_clear (data->important_candidates);
4837 for (i = 0; i < n_iv_uses (data); i++)
4839 struct iv_use *use = iv_use (data, i);
4841 free (use->iv);
4842 BITMAP_XFREE (use->related_cands);
4843 for (j = 0; j < use->n_map_members; j++)
4844 if (use->cost_map[j].depends_on)
4845 BITMAP_XFREE (use->cost_map[j].depends_on);
4846 free (use->cost_map);
4847 free (use);
4849 VARRAY_POP_ALL (data->iv_uses);
4851 for (i = 0; i < n_iv_cands (data); i++)
4853 struct iv_cand *cand = iv_cand (data, i);
4855 if (cand->iv)
4856 free (cand->iv);
4857 free (cand);
4859 VARRAY_POP_ALL (data->iv_candidates);
4861 if (data->version_info_size < num_ssa_names)
4863 data->version_info_size = 2 * num_ssa_names;
4864 free (data->version_info);
4865 data->version_info = xcalloc (data->version_info_size,
4866 sizeof (struct version_info));
4869 data->max_inv_id = 0;
4871 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4873 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4875 SET_DECL_RTL (obj, NULL_RTX);
4877 VARRAY_POP_ALL (decl_rtl_to_reset);
4880 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4881 loop tree. */
4883 static void
4884 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4886 unsigned i;
4888 for (i = 1; i < loops->num; i++)
4889 if (loops->parray[i])
4891 free (loops->parray[i]->aux);
4892 loops->parray[i]->aux = NULL;
4895 free_loop_data (data);
4896 free (data->version_info);
4897 BITMAP_XFREE (data->relevant);
4898 BITMAP_XFREE (data->important_candidates);
4900 VARRAY_FREE (decl_rtl_to_reset);
4901 VARRAY_FREE (data->iv_uses);
4902 VARRAY_FREE (data->iv_candidates);
4905 /* Optimizes the LOOP. Returns true if anything changed. */
4907 static bool
4908 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4910 bool changed = false;
4911 struct iv_ca *iv_ca;
4912 edge exit;
4914 data->current_loop = loop;
4916 if (dump_file && (dump_flags & TDF_DETAILS))
4918 fprintf (dump_file, "Processing loop %d\n", loop->num);
4920 exit = single_dom_exit (loop);
4921 if (exit)
4923 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4924 exit->src->index, exit->dest->index);
4925 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4926 fprintf (dump_file, "\n");
4929 fprintf (dump_file, "\n");
4932 /* For each ssa name determines whether it behaves as an induction variable
4933 in some loop. */
4934 if (!find_induction_variables (data))
4935 goto finish;
4937 /* Finds interesting uses (item 1). */
4938 find_interesting_uses (data);
4939 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4940 goto finish;
4942 /* Finds candidates for the induction variables (item 2). */
4943 find_iv_candidates (data);
4945 /* Calculates the costs (item 3, part 1). */
4946 determine_use_iv_costs (data);
4947 determine_iv_costs (data);
4948 determine_set_costs (data);
4950 /* Find the optimal set of induction variables (item 3, part 2). */
4951 iv_ca = find_optimal_iv_set (data);
4952 if (!iv_ca)
4953 goto finish;
4954 changed = true;
4956 /* Create the new induction variables (item 4, part 1). */
4957 create_new_ivs (data, iv_ca);
4958 iv_ca_free (&iv_ca);
4960 /* Rewrite the uses (item 4, part 2). */
4961 rewrite_uses (data);
4963 /* Remove the ivs that are unused after rewriting. */
4964 remove_unused_ivs (data);
4966 loop_commit_inserts ();
4968 /* We have changed the structure of induction variables; it might happen
4969 that definitions in the scev database refer to some of them that were
4970 eliminated. */
4971 scev_reset ();
4973 finish:
4974 free_loop_data (data);
4976 return changed;
4979 /* Main entry point. Optimizes induction variables in LOOPS. */
4981 void
4982 tree_ssa_iv_optimize (struct loops *loops)
4984 struct loop *loop;
4985 struct ivopts_data data;
4987 tree_ssa_iv_optimize_init (loops, &data);
4989 /* Optimize the loops starting with the innermost ones. */
4990 loop = loops->tree_root;
4991 while (loop->inner)
4992 loop = loop->inner;
4994 #ifdef ENABLE_CHECKING
4995 verify_loop_closed_ssa ();
4996 verify_stmts ();
4997 #endif
4999 /* Scan the loops, inner ones first. */
5000 while (loop != loops->tree_root)
5002 if (dump_file && (dump_flags & TDF_DETAILS))
5003 flow_loop_dump (loop, dump_file, NULL, 1);
5005 tree_ssa_iv_optimize_loop (&data, loop);
5007 if (loop->next)
5009 loop = loop->next;
5010 while (loop->inner)
5011 loop = loop->inner;
5013 else
5014 loop = loop->outer;
5017 #ifdef ENABLE_CHECKING
5018 verify_loop_closed_ssa ();
5019 verify_stmts ();
5020 #endif
5022 tree_ssa_iv_optimize_finalize (loops, &data);