Add Paolo Bonzini to vector ChangeLog.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob833cbf87296f43c169c3f2b20aaf03c04967ec32
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
104 struct iv
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Information attached to loop. */
127 struct loop_data
129 struct tree_niter_desc niter;
130 /* Number of iterations. */
132 unsigned regs_used; /* Number of registers used. */
135 /* Types of uses. */
136 enum use_type
138 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
139 USE_OUTER, /* The induction variable is used outside the loop. */
140 USE_ADDRESS, /* Use in an address. */
141 USE_COMPARE /* Use is a compare. */
144 /* The candidate - cost pair. */
145 struct cost_pair
147 struct iv_cand *cand; /* The candidate. */
148 unsigned cost; /* The cost. */
149 bitmap depends_on; /* The list of invariants that have to be
150 preserved. */
153 /* Use. */
154 struct iv_use
156 unsigned id; /* The id of the use. */
157 enum use_type type; /* Type of the use. */
158 struct iv *iv; /* The induction variable it is based on. */
159 tree stmt; /* Statement in that it occurs. */
160 tree *op_p; /* The place where it occurs. */
161 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 important ones. */
164 unsigned n_map_members; /* Number of candidates in the cost_map list. */
165 struct cost_pair *cost_map;
166 /* The costs wrto the iv candidates. */
168 struct iv_cand *selected;
169 /* The selected candidate. */
172 /* The position where the iv is computed. */
173 enum iv_position
175 IP_NORMAL, /* At the end, just before the exit condition. */
176 IP_END, /* At the end of the latch block. */
177 IP_ORIGINAL /* The original biv. */
180 /* The induction variable candidate. */
181 struct iv_cand
183 unsigned id; /* The number of the candidate. */
184 bool important; /* Whether this is an "important" candidate, i.e. such
185 that it should be considered by all uses. */
186 enum iv_position pos; /* Where it is computed. */
187 tree incremented_at; /* For original biv, the statement where it is
188 incremented. */
189 tree var_before; /* The variable used for it before increment. */
190 tree var_after; /* The variable used for it after increment. */
191 struct iv *iv; /* The value of the candidate. NULL for
192 "pseudocandidate" used to indicate the possibility
193 to replace the final value of an iv by direct
194 computation of the value. */
195 unsigned cost; /* Cost of the candidate. */
198 /* The data used by the induction variable optimizations. */
200 struct ivopts_data
202 /* The currently optimized loop. */
203 struct loop *current_loop;
205 /* The size of version_info array allocated. */
206 unsigned version_info_size;
208 /* The array of information for the ssa names. */
209 struct version_info *version_info;
211 /* The bitmap of indices in version_info whose value was changed. */
212 bitmap relevant;
214 /* The maximum invariant id. */
215 unsigned max_inv_id;
217 /* The uses of induction variables. */
218 varray_type iv_uses;
220 /* The candidates. */
221 varray_type iv_candidates;
223 /* A bitmap of important candidates. */
224 bitmap important_candidates;
226 /* Whether to consider just related and important candidates when replacing a
227 use. */
228 bool consider_all_candidates;
231 /* An assignment of iv candidates to uses. */
233 struct iv_ca
235 /* The number of uses covered by the assignment. */
236 unsigned upto;
238 /* Number of uses that cannot be expressed by the candidates in the set. */
239 unsigned bad_uses;
241 /* Candidate assigned to a use, together with the related costs. */
242 struct cost_pair **cand_for_use;
244 /* Number of times each candidate is used. */
245 unsigned *n_cand_uses;
247 /* The candidates used. */
248 bitmap cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* The list of trees for that the decl_rtl field must be reset is stored
295 here. */
297 static varray_type decl_rtl_to_reset;
299 /* Number of uses recorded in DATA. */
301 static inline unsigned
302 n_iv_uses (struct ivopts_data *data)
304 return VARRAY_ACTIVE_SIZE (data->iv_uses);
307 /* Ith use recorded in DATA. */
309 static inline struct iv_use *
310 iv_use (struct ivopts_data *data, unsigned i)
312 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
315 /* Number of candidates recorded in DATA. */
317 static inline unsigned
318 n_iv_cands (struct ivopts_data *data)
320 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
323 /* Ith candidate recorded in DATA. */
325 static inline struct iv_cand *
326 iv_cand (struct ivopts_data *data, unsigned i)
328 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
331 /* The data for LOOP. */
333 static inline struct loop_data *
334 loop_data (struct loop *loop)
336 return loop->aux;
339 /* The single loop exit if it dominates the latch, NULL otherwise. */
341 static edge
342 single_dom_exit (struct loop *loop)
344 edge exit = loop->single_exit;
346 if (!exit)
347 return NULL;
349 if (!just_once_each_iteration_p (loop, exit->src))
350 return NULL;
352 return exit;
355 /* Dumps information about the induction variable IV to FILE. */
357 extern void dump_iv (FILE *, struct iv *);
358 void
359 dump_iv (FILE *file, struct iv *iv)
361 if (iv->ssa_name)
363 fprintf (file, "ssa name ");
364 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
365 fprintf (file, "\n");
368 fprintf (file, " type ");
369 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
370 fprintf (file, "\n");
372 if (iv->step)
374 fprintf (file, " base ");
375 print_generic_expr (file, iv->base, TDF_SLIM);
376 fprintf (file, "\n");
378 fprintf (file, " step ");
379 print_generic_expr (file, iv->step, TDF_SLIM);
380 fprintf (file, "\n");
382 else
384 fprintf (file, " invariant ");
385 print_generic_expr (file, iv->base, TDF_SLIM);
386 fprintf (file, "\n");
389 if (iv->base_object)
391 fprintf (file, " base object ");
392 print_generic_expr (file, iv->base_object, TDF_SLIM);
393 fprintf (file, "\n");
396 if (iv->biv_p)
397 fprintf (file, " is a biv\n");
400 /* Dumps information about the USE to FILE. */
402 extern void dump_use (FILE *, struct iv_use *);
403 void
404 dump_use (FILE *file, struct iv_use *use)
406 fprintf (file, "use %d\n", use->id);
408 switch (use->type)
410 case USE_NONLINEAR_EXPR:
411 fprintf (file, " generic\n");
412 break;
414 case USE_OUTER:
415 fprintf (file, " outside\n");
416 break;
418 case USE_ADDRESS:
419 fprintf (file, " address\n");
420 break;
422 case USE_COMPARE:
423 fprintf (file, " compare\n");
424 break;
426 default:
427 gcc_unreachable ();
430 fprintf (file, " in statement ");
431 print_generic_expr (file, use->stmt, TDF_SLIM);
432 fprintf (file, "\n");
434 fprintf (file, " at position ");
435 if (use->op_p)
436 print_generic_expr (file, *use->op_p, TDF_SLIM);
437 fprintf (file, "\n");
439 dump_iv (file, use->iv);
441 fprintf (file, " related candidates ");
442 dump_bitmap (file, use->related_cands);
445 /* Dumps information about the uses to FILE. */
447 extern void dump_uses (FILE *, struct ivopts_data *);
448 void
449 dump_uses (FILE *file, struct ivopts_data *data)
451 unsigned i;
452 struct iv_use *use;
454 for (i = 0; i < n_iv_uses (data); i++)
456 use = iv_use (data, i);
458 dump_use (file, use);
459 fprintf (file, "\n");
463 /* Dumps information about induction variable candidate CAND to FILE. */
465 extern void dump_cand (FILE *, struct iv_cand *);
466 void
467 dump_cand (FILE *file, struct iv_cand *cand)
469 struct iv *iv = cand->iv;
471 fprintf (file, "candidate %d%s\n",
472 cand->id, cand->important ? " (important)" : "");
474 if (!iv)
476 fprintf (file, " final value replacement\n");
477 return;
480 switch (cand->pos)
482 case IP_NORMAL:
483 fprintf (file, " incremented before exit test\n");
484 break;
486 case IP_END:
487 fprintf (file, " incremented at end\n");
488 break;
490 case IP_ORIGINAL:
491 fprintf (file, " original biv\n");
492 break;
495 dump_iv (file, iv);
498 /* Returns the info for ssa version VER. */
500 static inline struct version_info *
501 ver_info (struct ivopts_data *data, unsigned ver)
503 return data->version_info + ver;
506 /* Returns the info for ssa name NAME. */
508 static inline struct version_info *
509 name_info (struct ivopts_data *data, tree name)
511 return ver_info (data, SSA_NAME_VERSION (name));
514 /* Checks whether there exists number X such that X * B = A, counting modulo
515 2^BITS. */
517 static bool
518 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
519 HOST_WIDE_INT *x)
521 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
522 unsigned HOST_WIDE_INT inv, ex, val;
523 unsigned i;
525 a &= mask;
526 b &= mask;
528 /* First divide the whole equation by 2 as long as possible. */
529 while (!(a & 1) && !(b & 1))
531 a >>= 1;
532 b >>= 1;
533 bits--;
534 mask >>= 1;
537 if (!(b & 1))
539 /* If b is still even, a is odd and there is no such x. */
540 return false;
543 /* Find the inverse of b. We compute it as
544 b^(2^(bits - 1) - 1) (mod 2^bits). */
545 inv = 1;
546 ex = b;
547 for (i = 0; i < bits - 1; i++)
549 inv = (inv * ex) & mask;
550 ex = (ex * ex) & mask;
553 val = (a * inv) & mask;
555 gcc_assert (((val * b) & mask) == a);
557 if ((val >> (bits - 1)) & 1)
558 val |= ~mask;
560 *x = val;
562 return true;
565 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
566 emitted in LOOP. */
568 static bool
569 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
571 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
573 gcc_assert (bb);
575 if (sbb == loop->latch)
576 return true;
578 if (sbb != bb)
579 return false;
581 return stmt == last_stmt (bb);
584 /* Returns true if STMT if after the place where the original induction
585 variable CAND is incremented. */
587 static bool
588 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
590 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
591 basic_block stmt_bb = bb_for_stmt (stmt);
592 block_stmt_iterator bsi;
594 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
595 return false;
597 if (stmt_bb != cand_bb)
598 return true;
600 /* Scan the block from the end, since the original ivs are usually
601 incremented at the end of the loop body. */
602 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
604 if (bsi_stmt (bsi) == cand->incremented_at)
605 return false;
606 if (bsi_stmt (bsi) == stmt)
607 return true;
611 /* Returns true if STMT if after the place where the induction variable
612 CAND is incremented in LOOP. */
614 static bool
615 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
617 switch (cand->pos)
619 case IP_END:
620 return false;
622 case IP_NORMAL:
623 return stmt_after_ip_normal_pos (loop, stmt);
625 case IP_ORIGINAL:
626 return stmt_after_ip_original_pos (cand, stmt);
628 default:
629 gcc_unreachable ();
633 /* Initializes data structures used by the iv optimization pass, stored
634 in DATA. LOOPS is the loop tree. */
636 static void
637 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
639 unsigned i;
641 data->version_info_size = 2 * num_ssa_names;
642 data->version_info = xcalloc (data->version_info_size,
643 sizeof (struct version_info));
644 data->relevant = BITMAP_XMALLOC ();
645 data->important_candidates = BITMAP_XMALLOC ();
646 data->max_inv_id = 0;
648 for (i = 1; i < loops->num; i++)
649 if (loops->parray[i])
650 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
652 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
653 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
654 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
657 /* Returns a memory object to that EXPR points. In case we are able to
658 determine that it does not point to any such object, NULL is returned. */
660 static tree
661 determine_base_object (tree expr)
663 enum tree_code code = TREE_CODE (expr);
664 tree base, obj, op0, op1;
666 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
667 return NULL_TREE;
669 switch (code)
671 case INTEGER_CST:
672 return NULL_TREE;
674 case ADDR_EXPR:
675 obj = TREE_OPERAND (expr, 0);
676 base = get_base_address (obj);
678 if (!base)
679 return fold_convert (ptr_type_node, expr);
681 if (TREE_CODE (base) == INDIRECT_REF)
682 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
684 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
686 case PLUS_EXPR:
687 case MINUS_EXPR:
688 op0 = determine_base_object (TREE_OPERAND (expr, 0));
689 op1 = determine_base_object (TREE_OPERAND (expr, 1));
691 if (!op1)
692 return op0;
694 if (!op0)
695 return (code == PLUS_EXPR
696 ? op1
697 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
699 return fold (build (code, ptr_type_node, op0, op1));
701 default:
702 return fold_convert (ptr_type_node, expr);
706 /* Allocates an induction variable with given initial value BASE and step STEP
707 for loop LOOP. */
709 static struct iv *
710 alloc_iv (tree base, tree step)
712 struct iv *iv = xcalloc (1, sizeof (struct iv));
714 if (step && integer_zerop (step))
715 step = NULL_TREE;
717 iv->base = base;
718 iv->base_object = determine_base_object (base);
719 iv->step = step;
720 iv->biv_p = false;
721 iv->have_use_for = false;
722 iv->use_id = 0;
723 iv->ssa_name = NULL_TREE;
725 return iv;
728 /* Sets STEP and BASE for induction variable IV. */
730 static void
731 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
733 struct version_info *info = name_info (data, iv);
735 gcc_assert (!info->iv);
737 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
738 info->iv = alloc_iv (base, step);
739 info->iv->ssa_name = iv;
742 /* Finds induction variable declaration for VAR. */
744 static struct iv *
745 get_iv (struct ivopts_data *data, tree var)
747 basic_block bb;
749 if (!name_info (data, var)->iv)
751 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
753 if (!bb
754 || !flow_bb_inside_loop_p (data->current_loop, bb))
755 set_iv (data, var, var, NULL_TREE);
758 return name_info (data, var)->iv;
761 /* Determines the step of a biv defined in PHI. */
763 static tree
764 determine_biv_step (tree phi)
766 struct loop *loop = bb_for_stmt (phi)->loop_father;
767 tree name = PHI_RESULT (phi), base, step;
768 tree type = TREE_TYPE (name);
770 if (!is_gimple_reg (name))
771 return NULL_TREE;
773 if (!simple_iv (loop, phi, name, &base, &step))
774 return NULL_TREE;
776 if (!step)
777 return build_int_cst (type, 0);
779 return step;
782 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
784 static bool
785 abnormal_ssa_name_p (tree exp)
787 if (!exp)
788 return false;
790 if (TREE_CODE (exp) != SSA_NAME)
791 return false;
793 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
796 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
797 abnormal phi node. Callback for for_each_index. */
799 static bool
800 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
801 void *data ATTRIBUTE_UNUSED)
803 if (TREE_CODE (base) == ARRAY_REF)
805 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
806 return false;
807 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
808 return false;
811 return !abnormal_ssa_name_p (*index);
814 /* Returns true if EXPR contains a ssa name that occurs in an
815 abnormal phi node. */
817 static bool
818 contains_abnormal_ssa_name_p (tree expr)
820 enum tree_code code = TREE_CODE (expr);
821 enum tree_code_class class = TREE_CODE_CLASS (code);
823 if (code == SSA_NAME)
824 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
826 if (code == INTEGER_CST
827 || is_gimple_min_invariant (expr))
828 return false;
830 if (code == ADDR_EXPR)
831 return !for_each_index (&TREE_OPERAND (expr, 1),
832 idx_contains_abnormal_ssa_name_p,
833 NULL);
835 switch (class)
837 case tcc_binary:
838 case tcc_comparison:
839 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
840 return true;
842 /* Fallthru. */
843 case tcc_unary:
844 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
845 return true;
847 break;
849 default:
850 gcc_unreachable ();
853 return false;
856 /* Finds basic ivs. */
858 static bool
859 find_bivs (struct ivopts_data *data)
861 tree phi, step, type, base;
862 bool found = false;
863 struct loop *loop = data->current_loop;
865 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
867 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
868 continue;
870 step = determine_biv_step (phi);
872 if (!step)
873 continue;
874 if (cst_and_fits_in_hwi (step)
875 && int_cst_value (step) == 0)
876 continue;
878 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
879 if (contains_abnormal_ssa_name_p (base))
880 continue;
882 type = TREE_TYPE (PHI_RESULT (phi));
883 base = fold_convert (type, base);
884 step = fold_convert (type, step);
886 /* FIXME: We do not handle induction variables whose step does
887 not satisfy cst_and_fits_in_hwi. */
888 if (!cst_and_fits_in_hwi (step))
889 continue;
891 set_iv (data, PHI_RESULT (phi), base, step);
892 found = true;
895 return found;
898 /* Marks basic ivs. */
900 static void
901 mark_bivs (struct ivopts_data *data)
903 tree phi, var;
904 struct iv *iv, *incr_iv;
905 struct loop *loop = data->current_loop;
906 basic_block incr_bb;
908 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
910 iv = get_iv (data, PHI_RESULT (phi));
911 if (!iv)
912 continue;
914 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
915 incr_iv = get_iv (data, var);
916 if (!incr_iv)
917 continue;
919 /* If the increment is in the subloop, ignore it. */
920 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
921 if (incr_bb->loop_father != data->current_loop
922 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
923 continue;
925 iv->biv_p = true;
926 incr_iv->biv_p = true;
930 /* Checks whether STMT defines a linear induction variable and stores its
931 parameters to BASE and STEP. */
933 static bool
934 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
935 tree *base, tree *step)
937 tree lhs;
938 struct loop *loop = data->current_loop;
940 *base = NULL_TREE;
941 *step = NULL_TREE;
943 if (TREE_CODE (stmt) != MODIFY_EXPR)
944 return false;
946 lhs = TREE_OPERAND (stmt, 0);
947 if (TREE_CODE (lhs) != SSA_NAME)
948 return false;
950 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
951 return false;
953 /* FIXME: We do not handle induction variables whose step does
954 not satisfy cst_and_fits_in_hwi. */
955 if (!zero_p (*step)
956 && !cst_and_fits_in_hwi (*step))
957 return false;
959 if (contains_abnormal_ssa_name_p (*base))
960 return false;
962 return true;
965 /* Finds general ivs in statement STMT. */
967 static void
968 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
970 tree base, step;
972 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
973 return;
975 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
978 /* Finds general ivs in basic block BB. */
980 static void
981 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
983 block_stmt_iterator bsi;
985 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
986 find_givs_in_stmt (data, bsi_stmt (bsi));
989 /* Finds general ivs. */
991 static void
992 find_givs (struct ivopts_data *data)
994 struct loop *loop = data->current_loop;
995 basic_block *body = get_loop_body_in_dom_order (loop);
996 unsigned i;
998 for (i = 0; i < loop->num_nodes; i++)
999 find_givs_in_bb (data, body[i]);
1000 free (body);
1003 /* Determine the number of iterations of the current loop. */
1005 static void
1006 determine_number_of_iterations (struct ivopts_data *data)
1008 struct loop *loop = data->current_loop;
1009 edge exit = single_dom_exit (loop);
1011 if (!exit)
1012 return;
1014 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1017 /* For each ssa name defined in LOOP determines whether it is an induction
1018 variable and if so, its initial value and step. */
1020 static bool
1021 find_induction_variables (struct ivopts_data *data)
1023 unsigned i;
1024 struct loop *loop = data->current_loop;
1025 bitmap_iterator bi;
1027 if (!find_bivs (data))
1028 return false;
1030 find_givs (data);
1031 mark_bivs (data);
1032 determine_number_of_iterations (data);
1034 if (dump_file && (dump_flags & TDF_DETAILS))
1036 if (loop_data (loop)->niter.niter)
1038 fprintf (dump_file, " number of iterations ");
1039 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1040 TDF_SLIM);
1041 fprintf (dump_file, "\n");
1043 fprintf (dump_file, " may be zero if ");
1044 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1045 TDF_SLIM);
1046 fprintf (dump_file, "\n");
1048 fprintf (dump_file, " bogus unless ");
1049 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1050 TDF_SLIM);
1051 fprintf (dump_file, "\n");
1052 fprintf (dump_file, "\n");
1055 fprintf (dump_file, "Induction variables:\n\n");
1057 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1059 if (ver_info (data, i)->iv)
1060 dump_iv (dump_file, ver_info (data, i)->iv);
1064 return true;
1067 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1069 static struct iv_use *
1070 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1071 tree stmt, enum use_type use_type)
1073 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1075 use->id = n_iv_uses (data);
1076 use->type = use_type;
1077 use->iv = iv;
1078 use->stmt = stmt;
1079 use->op_p = use_p;
1080 use->related_cands = BITMAP_XMALLOC ();
1082 /* To avoid showing ssa name in the dumps, if it was not reset by the
1083 caller. */
1084 iv->ssa_name = NULL_TREE;
1086 if (dump_file && (dump_flags & TDF_DETAILS))
1087 dump_use (dump_file, use);
1089 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1091 return use;
1094 /* Checks whether OP is a loop-level invariant and if so, records it.
1095 NONLINEAR_USE is true if the invariant is used in a way we do not
1096 handle specially. */
1098 static void
1099 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1101 basic_block bb;
1102 struct version_info *info;
1104 if (TREE_CODE (op) != SSA_NAME
1105 || !is_gimple_reg (op))
1106 return;
1108 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1109 if (bb
1110 && flow_bb_inside_loop_p (data->current_loop, bb))
1111 return;
1113 info = name_info (data, op);
1114 info->name = op;
1115 info->has_nonlin_use |= nonlinear_use;
1116 if (!info->inv_id)
1117 info->inv_id = ++data->max_inv_id;
1118 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 /* Checks whether the use OP is interesting and if so, records it
1122 as TYPE. */
1124 static struct iv_use *
1125 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1126 enum use_type type)
1128 struct iv *iv;
1129 struct iv *civ;
1130 tree stmt;
1131 struct iv_use *use;
1133 if (TREE_CODE (op) != SSA_NAME)
1134 return NULL;
1136 iv = get_iv (data, op);
1137 if (!iv)
1138 return NULL;
1140 if (iv->have_use_for)
1142 use = iv_use (data, iv->use_id);
1144 gcc_assert (use->type == USE_NONLINEAR_EXPR
1145 || use->type == USE_OUTER);
1147 if (type == USE_NONLINEAR_EXPR)
1148 use->type = USE_NONLINEAR_EXPR;
1149 return use;
1152 if (zero_p (iv->step))
1154 record_invariant (data, op, true);
1155 return NULL;
1157 iv->have_use_for = true;
1159 civ = xmalloc (sizeof (struct iv));
1160 *civ = *iv;
1162 stmt = SSA_NAME_DEF_STMT (op);
1163 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1164 || TREE_CODE (stmt) == MODIFY_EXPR);
1166 use = record_use (data, NULL, civ, stmt, type);
1167 iv->use_id = use->id;
1169 return use;
1172 /* Checks whether the use OP is interesting and if so, records it. */
1174 static struct iv_use *
1175 find_interesting_uses_op (struct ivopts_data *data, tree op)
1177 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1180 /* Records a definition of induction variable OP that is used outside of the
1181 loop. */
1183 static struct iv_use *
1184 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1186 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1189 /* Checks whether the condition *COND_P in STMT is interesting
1190 and if so, records it. */
1192 static void
1193 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1195 tree *op0_p;
1196 tree *op1_p;
1197 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1198 struct iv const_iv;
1199 tree zero = integer_zero_node;
1201 const_iv.step = NULL_TREE;
1203 if (integer_zerop (*cond_p)
1204 || integer_nonzerop (*cond_p))
1205 return;
1207 if (TREE_CODE (*cond_p) == SSA_NAME)
1209 op0_p = cond_p;
1210 op1_p = &zero;
1212 else
1214 op0_p = &TREE_OPERAND (*cond_p, 0);
1215 op1_p = &TREE_OPERAND (*cond_p, 1);
1218 if (TREE_CODE (*op0_p) == SSA_NAME)
1219 iv0 = get_iv (data, *op0_p);
1220 else
1221 iv0 = &const_iv;
1223 if (TREE_CODE (*op1_p) == SSA_NAME)
1224 iv1 = get_iv (data, *op1_p);
1225 else
1226 iv1 = &const_iv;
1228 if (/* When comparing with non-invariant value, we may not do any senseful
1229 induction variable elimination. */
1230 (!iv0 || !iv1)
1231 /* Eliminating condition based on two ivs would be nontrivial.
1232 ??? TODO -- it is not really important to handle this case. */
1233 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1235 find_interesting_uses_op (data, *op0_p);
1236 find_interesting_uses_op (data, *op1_p);
1237 return;
1240 if (zero_p (iv0->step) && zero_p (iv1->step))
1242 /* If both are invariants, this is a work for unswitching. */
1243 return;
1246 civ = xmalloc (sizeof (struct iv));
1247 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1248 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1257 basic_block def_bb;
1258 unsigned i, len;
1260 if (is_gimple_min_invariant (expr))
1261 return true;
1263 if (TREE_CODE (expr) == SSA_NAME)
1265 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266 if (def_bb
1267 && flow_bb_inside_loop_p (loop, def_bb))
1268 return false;
1270 return true;
1273 if (!EXPR_P (expr))
1274 return false;
1276 len = first_rtl_op (TREE_CODE (expr));
1277 for (i = 0; i < len; i++)
1278 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279 return false;
1281 return true;
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1288 struct ifs_ivopts_data
1290 struct ivopts_data *ivopts_data;
1291 tree stmt;
1292 tree *step_p;
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1298 struct ifs_ivopts_data *dta = data;
1299 struct iv *iv;
1300 tree step, type, iv_type, iv_step, lbound, off;
1301 struct loop *loop = dta->ivopts_data->current_loop;
1303 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305 return false;
1307 /* If base is a component ref, require that the offset of the reference
1308 be invariant. */
1309 if (TREE_CODE (base) == COMPONENT_REF)
1311 off = component_ref_field_offset (base);
1312 return expr_invariant_in_loop_p (loop, off);
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base) == ARRAY_REF)
1321 step = array_ref_element_size (base);
1322 lbound = array_ref_low_bound (base);
1324 if (!expr_invariant_in_loop_p (loop, step)
1325 || !expr_invariant_in_loop_p (loop, lbound))
1326 return false;
1329 if (TREE_CODE (*idx) != SSA_NAME)
1330 return true;
1332 iv = get_iv (dta->ivopts_data, *idx);
1333 if (!iv)
1334 return false;
1336 *idx = iv->base;
1338 if (!iv->step)
1339 return true;
1341 iv_type = TREE_TYPE (iv->base);
1342 type = build_pointer_type (TREE_TYPE (base));
1343 if (TREE_CODE (base) == ARRAY_REF)
1345 step = array_ref_element_size (base);
1347 /* We only handle addresses whose step is an integer constant. */
1348 if (TREE_CODE (step) != INTEGER_CST)
1349 return false;
1351 else
1352 /* The step for pointer arithmetics already is 1 byte. */
1353 step = build_int_cst (type, 1);
1355 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1356 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1357 type, iv->base, iv->step, dta->stmt);
1358 else
1359 iv_step = fold_convert (iv_type, iv->step);
1361 if (!iv_step)
1363 /* The index might wrap. */
1364 return false;
1367 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1369 if (!*dta->step_p)
1370 *dta->step_p = step;
1371 else
1372 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1374 return true;
1377 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1378 object is passed to it in DATA. */
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382 void *data)
1384 find_interesting_uses_op (data, *idx);
1385 if (TREE_CODE (base) == ARRAY_REF)
1387 find_interesting_uses_op (data, array_ref_element_size (base));
1388 find_interesting_uses_op (data, array_ref_low_bound (base));
1390 return true;
1393 /* Finds addresses in *OP_P inside STMT. */
1395 static void
1396 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1398 tree base = unshare_expr (*op_p), step = NULL;
1399 struct iv *civ;
1400 struct ifs_ivopts_data ifs_ivopts_data;
1402 /* Ignore bitfields for now. Not really something terribly complicated
1403 to handle. TODO. */
1404 if (TREE_CODE (base) == COMPONENT_REF
1405 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1406 goto fail;
1408 ifs_ivopts_data.ivopts_data = data;
1409 ifs_ivopts_data.stmt = stmt;
1410 ifs_ivopts_data.step_p = &step;
1411 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1412 || zero_p (step))
1413 goto fail;
1415 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1416 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1418 if (TREE_CODE (base) == INDIRECT_REF)
1419 base = TREE_OPERAND (base, 0);
1420 else
1421 base = build_addr (base);
1423 civ = alloc_iv (base, step);
1424 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1425 return;
1427 fail:
1428 for_each_index (op_p, idx_record_use, data);
1431 /* Finds and records invariants used in STMT. */
1433 static void
1434 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1436 use_optype uses = NULL;
1437 unsigned i, n;
1438 tree op;
1440 if (TREE_CODE (stmt) == PHI_NODE)
1441 n = PHI_NUM_ARGS (stmt);
1442 else
1444 get_stmt_operands (stmt);
1445 uses = STMT_USE_OPS (stmt);
1446 n = NUM_USES (uses);
1449 for (i = 0; i < n; i++)
1451 if (TREE_CODE (stmt) == PHI_NODE)
1452 op = PHI_ARG_DEF (stmt, i);
1453 else
1454 op = USE_OP (uses, i);
1456 record_invariant (data, op, false);
1460 /* Finds interesting uses of induction variables in the statement STMT. */
1462 static void
1463 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1465 struct iv *iv;
1466 tree op, lhs, rhs;
1467 use_optype uses = NULL;
1468 unsigned i, n;
1470 find_invariants_stmt (data, stmt);
1472 if (TREE_CODE (stmt) == COND_EXPR)
1474 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1475 return;
1478 if (TREE_CODE (stmt) == MODIFY_EXPR)
1480 lhs = TREE_OPERAND (stmt, 0);
1481 rhs = TREE_OPERAND (stmt, 1);
1483 if (TREE_CODE (lhs) == SSA_NAME)
1485 /* If the statement defines an induction variable, the uses are not
1486 interesting by themselves. */
1488 iv = get_iv (data, lhs);
1490 if (iv && !zero_p (iv->step))
1491 return;
1494 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1496 case tcc_comparison:
1497 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1498 return;
1500 case tcc_reference:
1501 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1502 if (REFERENCE_CLASS_P (lhs))
1503 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1504 return;
1506 default: ;
1509 if (REFERENCE_CLASS_P (lhs)
1510 && is_gimple_val (rhs))
1512 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1513 find_interesting_uses_op (data, rhs);
1514 return;
1517 /* TODO -- we should also handle address uses of type
1519 memory = call (whatever);
1523 call (memory). */
1526 if (TREE_CODE (stmt) == PHI_NODE
1527 && bb_for_stmt (stmt) == data->current_loop->header)
1529 lhs = PHI_RESULT (stmt);
1530 iv = get_iv (data, lhs);
1532 if (iv && !zero_p (iv->step))
1533 return;
1536 if (TREE_CODE (stmt) == PHI_NODE)
1537 n = PHI_NUM_ARGS (stmt);
1538 else
1540 uses = STMT_USE_OPS (stmt);
1541 n = NUM_USES (uses);
1544 for (i = 0; i < n; i++)
1546 if (TREE_CODE (stmt) == PHI_NODE)
1547 op = PHI_ARG_DEF (stmt, i);
1548 else
1549 op = USE_OP (uses, i);
1551 if (TREE_CODE (op) != SSA_NAME)
1552 continue;
1554 iv = get_iv (data, op);
1555 if (!iv)
1556 continue;
1558 find_interesting_uses_op (data, op);
1562 /* Finds interesting uses of induction variables outside of loops
1563 on loop exit edge EXIT. */
1565 static void
1566 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1568 tree phi, def;
1570 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1572 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1573 find_interesting_uses_outer (data, def);
1577 /* Finds uses of the induction variables that are interesting. */
1579 static void
1580 find_interesting_uses (struct ivopts_data *data)
1582 basic_block bb;
1583 block_stmt_iterator bsi;
1584 tree phi;
1585 basic_block *body = get_loop_body (data->current_loop);
1586 unsigned i;
1587 struct version_info *info;
1588 edge e;
1590 if (dump_file && (dump_flags & TDF_DETAILS))
1591 fprintf (dump_file, "Uses:\n\n");
1593 for (i = 0; i < data->current_loop->num_nodes; i++)
1595 edge_iterator ei;
1596 bb = body[i];
1598 FOR_EACH_EDGE (e, ei, bb->succs)
1599 if (e->dest != EXIT_BLOCK_PTR
1600 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1601 find_interesting_uses_outside (data, e);
1603 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1604 find_interesting_uses_stmt (data, phi);
1605 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1606 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1611 bitmap_iterator bi;
1613 fprintf (dump_file, "\n");
1615 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1617 info = ver_info (data, i);
1618 if (info->inv_id)
1620 fprintf (dump_file, " ");
1621 print_generic_expr (dump_file, info->name, TDF_SLIM);
1622 fprintf (dump_file, " is invariant (%d)%s\n",
1623 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1627 fprintf (dump_file, "\n");
1630 free (body);
1633 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1634 position to POS. If USE is not NULL, the candidate is set as related to
1635 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1636 replacement of the final value of the iv by a direct computation. */
1638 static struct iv_cand *
1639 add_candidate_1 (struct ivopts_data *data,
1640 tree base, tree step, bool important, enum iv_position pos,
1641 struct iv_use *use, tree incremented_at)
1643 unsigned i;
1644 struct iv_cand *cand = NULL;
1645 tree type;
1647 if (base)
1649 type = TREE_TYPE (base);
1650 if (!TYPE_UNSIGNED (type))
1652 type = unsigned_type_for (type);
1653 base = fold_convert (type, base);
1654 if (step)
1655 step = fold_convert (type, step);
1659 for (i = 0; i < n_iv_cands (data); i++)
1661 cand = iv_cand (data, i);
1663 if (cand->pos != pos)
1664 continue;
1666 if (cand->incremented_at != incremented_at)
1667 continue;
1669 if (!cand->iv)
1671 if (!base && !step)
1672 break;
1674 continue;
1677 if (!base && !step)
1678 continue;
1680 if (!operand_equal_p (base, cand->iv->base, 0))
1681 continue;
1683 if (zero_p (cand->iv->step))
1685 if (zero_p (step))
1686 break;
1688 else
1690 if (step && operand_equal_p (step, cand->iv->step, 0))
1691 break;
1695 if (i == n_iv_cands (data))
1697 cand = xcalloc (1, sizeof (struct iv_cand));
1698 cand->id = i;
1700 if (!base && !step)
1701 cand->iv = NULL;
1702 else
1703 cand->iv = alloc_iv (base, step);
1705 cand->pos = pos;
1706 if (pos != IP_ORIGINAL && cand->iv)
1708 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1709 cand->var_after = cand->var_before;
1711 cand->important = important;
1712 cand->incremented_at = incremented_at;
1713 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1715 if (dump_file && (dump_flags & TDF_DETAILS))
1716 dump_cand (dump_file, cand);
1719 if (important && !cand->important)
1721 cand->important = true;
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1726 if (use)
1728 bitmap_set_bit (use->related_cands, i);
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "Candidate %d is related to use %d\n",
1731 cand->id, use->id);
1734 return cand;
1737 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1738 position to POS. If USE is not NULL, the candidate is set as related to
1739 it. The candidate computation is scheduled on all available positions. */
1741 static void
1742 add_candidate (struct ivopts_data *data,
1743 tree base, tree step, bool important, struct iv_use *use)
1745 if (ip_normal_pos (data->current_loop))
1746 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1747 if (ip_end_pos (data->current_loop))
1748 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1751 /* Adds standard iv candidates. */
1753 static void
1754 add_standard_iv_candidates (struct ivopts_data *data)
1756 /* Add 0 + 1 * iteration candidate. */
1757 add_candidate (data,
1758 build_int_cst (unsigned_intSI_type_node, 0),
1759 build_int_cst (unsigned_intSI_type_node, 1),
1760 true, NULL);
1762 /* The same for a long type if it is still fast enough. */
1763 if (BITS_PER_WORD > 32)
1764 add_candidate (data,
1765 build_int_cst (unsigned_intDI_type_node, 0),
1766 build_int_cst (unsigned_intDI_type_node, 1),
1767 true, NULL);
1771 /* Adds candidates bases on the old induction variable IV. */
1773 static void
1774 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1776 tree phi, def;
1777 struct iv_cand *cand;
1779 add_candidate (data, iv->base, iv->step, true, NULL);
1781 /* The same, but with initial value zero. */
1782 add_candidate (data,
1783 build_int_cst (TREE_TYPE (iv->base), 0),
1784 iv->step, true, NULL);
1786 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1787 if (TREE_CODE (phi) == PHI_NODE)
1789 /* Additionally record the possibility of leaving the original iv
1790 untouched. */
1791 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1792 cand = add_candidate_1 (data,
1793 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1794 SSA_NAME_DEF_STMT (def));
1795 cand->var_before = iv->ssa_name;
1796 cand->var_after = def;
1800 /* Adds candidates based on the old induction variables. */
1802 static void
1803 add_old_ivs_candidates (struct ivopts_data *data)
1805 unsigned i;
1806 struct iv *iv;
1807 bitmap_iterator bi;
1809 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1811 iv = ver_info (data, i)->iv;
1812 if (iv && iv->biv_p && !zero_p (iv->step))
1813 add_old_iv_candidates (data, iv);
1817 /* Adds candidates based on the value of the induction variable IV and USE. */
1819 static void
1820 add_iv_value_candidates (struct ivopts_data *data,
1821 struct iv *iv, struct iv_use *use)
1823 add_candidate (data, iv->base, iv->step, false, use);
1825 /* The same, but with initial value zero. */
1826 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1827 iv->step, false, use);
1830 /* Adds candidates based on the address IV and USE. */
1832 static void
1833 add_address_candidates (struct ivopts_data *data,
1834 struct iv *iv, struct iv_use *use)
1836 tree base, abase, tmp, *act;
1838 /* First, the trivial choices. */
1839 add_iv_value_candidates (data, iv, use);
1841 /* Second, try removing the COMPONENT_REFs. */
1842 if (TREE_CODE (iv->base) == ADDR_EXPR)
1844 base = TREE_OPERAND (iv->base, 0);
1845 while (TREE_CODE (base) == COMPONENT_REF
1846 || (TREE_CODE (base) == ARRAY_REF
1847 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1848 base = TREE_OPERAND (base, 0);
1850 if (base != TREE_OPERAND (iv->base, 0))
1852 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1853 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1855 if (TREE_CODE (base) == INDIRECT_REF)
1856 base = TREE_OPERAND (base, 0);
1857 else
1858 base = build_addr (base);
1859 add_candidate (data, base, iv->step, false, use);
1863 /* Third, try removing the constant offset. */
1864 abase = iv->base;
1865 while (TREE_CODE (abase) == PLUS_EXPR
1866 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1867 abase = TREE_OPERAND (abase, 0);
1868 /* We found the offset, so make the copy of the non-shared part and
1869 remove it. */
1870 if (TREE_CODE (abase) == PLUS_EXPR)
1872 tmp = iv->base;
1873 act = &base;
1875 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1877 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1878 NULL_TREE, TREE_OPERAND (tmp, 1));
1879 act = &TREE_OPERAND (*act, 0);
1881 *act = TREE_OPERAND (tmp, 0);
1883 add_candidate (data, base, iv->step, false, use);
1887 /* Possibly adds pseudocandidate for replacing the final value of USE by
1888 a direct computation. */
1890 static void
1891 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1893 struct tree_niter_desc *niter;
1894 struct loop *loop = data->current_loop;
1896 /* We must know where we exit the loop and how many times does it roll. */
1897 if (!single_dom_exit (loop))
1898 return;
1900 niter = &loop_data (loop)->niter;
1901 if (!niter->niter
1902 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1903 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1904 return;
1906 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1909 /* Adds candidates based on the uses. */
1911 static void
1912 add_derived_ivs_candidates (struct ivopts_data *data)
1914 unsigned i;
1916 for (i = 0; i < n_iv_uses (data); i++)
1918 struct iv_use *use = iv_use (data, i);
1920 if (!use)
1921 continue;
1923 switch (use->type)
1925 case USE_NONLINEAR_EXPR:
1926 case USE_COMPARE:
1927 /* Just add the ivs based on the value of the iv used here. */
1928 add_iv_value_candidates (data, use->iv, use);
1929 break;
1931 case USE_OUTER:
1932 add_iv_value_candidates (data, use->iv, use);
1934 /* Additionally, add the pseudocandidate for the possibility to
1935 replace the final value by a direct computation. */
1936 add_iv_outer_candidates (data, use);
1937 break;
1939 case USE_ADDRESS:
1940 add_address_candidates (data, use->iv, use);
1941 break;
1943 default:
1944 gcc_unreachable ();
1949 /* Record important candidates and add them to related_cands bitmaps
1950 if needed. */
1952 static void
1953 record_important_candidates (struct ivopts_data *data)
1955 unsigned i;
1956 struct iv_use *use;
1958 for (i = 0; i < n_iv_cands (data); i++)
1960 struct iv_cand *cand = iv_cand (data, i);
1962 if (cand->important)
1963 bitmap_set_bit (data->important_candidates, i);
1966 data->consider_all_candidates = (n_iv_cands (data)
1967 <= CONSIDER_ALL_CANDIDATES_BOUND);
1969 if (data->consider_all_candidates)
1971 /* We will not need "related_cands" bitmaps in this case,
1972 so release them to decrease peak memory consumption. */
1973 for (i = 0; i < n_iv_uses (data); i++)
1975 use = iv_use (data, i);
1976 BITMAP_XFREE (use->related_cands);
1979 else
1981 /* Add important candidates to the related_cands bitmaps. */
1982 for (i = 0; i < n_iv_uses (data); i++)
1983 bitmap_ior_into (iv_use (data, i)->related_cands,
1984 data->important_candidates);
1988 /* Finds the candidates for the induction variables. */
1990 static void
1991 find_iv_candidates (struct ivopts_data *data)
1993 /* Add commonly used ivs. */
1994 add_standard_iv_candidates (data);
1996 /* Add old induction variables. */
1997 add_old_ivs_candidates (data);
1999 /* Add induction variables derived from uses. */
2000 add_derived_ivs_candidates (data);
2002 /* Record the important candidates. */
2003 record_important_candidates (data);
2006 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2007 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2008 we allocate a simple list to every use. */
2010 static void
2011 alloc_use_cost_map (struct ivopts_data *data)
2013 unsigned i, size, s, j;
2015 for (i = 0; i < n_iv_uses (data); i++)
2017 struct iv_use *use = iv_use (data, i);
2018 bitmap_iterator bi;
2020 if (data->consider_all_candidates)
2021 size = n_iv_cands (data);
2022 else
2024 s = 0;
2025 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2027 s++;
2030 /* Round up to the power of two, so that moduling by it is fast. */
2031 for (size = 1; size < s; size <<= 1)
2032 continue;
2035 use->n_map_members = size;
2036 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2040 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2041 on invariants DEPENDS_ON. */
2043 static void
2044 set_use_iv_cost (struct ivopts_data *data,
2045 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2046 bitmap depends_on)
2048 unsigned i, s;
2050 if (cost == INFTY)
2052 if (depends_on)
2053 BITMAP_XFREE (depends_on);
2054 return;
2057 if (data->consider_all_candidates)
2059 use->cost_map[cand->id].cand = cand;
2060 use->cost_map[cand->id].cost = cost;
2061 use->cost_map[cand->id].depends_on = depends_on;
2062 return;
2065 /* n_map_members is a power of two, so this computes modulo. */
2066 s = cand->id & (use->n_map_members - 1);
2067 for (i = s; i < use->n_map_members; i++)
2068 if (!use->cost_map[i].cand)
2069 goto found;
2070 for (i = 0; i < s; i++)
2071 if (!use->cost_map[i].cand)
2072 goto found;
2074 gcc_unreachable ();
2076 found:
2077 use->cost_map[i].cand = cand;
2078 use->cost_map[i].cost = cost;
2079 use->cost_map[i].depends_on = depends_on;
2082 /* Gets cost of (USE, CANDIDATE) pair. */
2084 static struct cost_pair *
2085 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2086 struct iv_cand *cand)
2088 unsigned i, s;
2089 struct cost_pair *ret;
2091 if (!cand)
2092 return NULL;
2094 if (data->consider_all_candidates)
2096 ret = use->cost_map + cand->id;
2097 if (!ret->cand)
2098 return NULL;
2100 return ret;
2103 /* n_map_members is a power of two, so this computes modulo. */
2104 s = cand->id & (use->n_map_members - 1);
2105 for (i = s; i < use->n_map_members; i++)
2106 if (use->cost_map[i].cand == cand)
2107 return use->cost_map + i;
2109 for (i = 0; i < s; i++)
2110 if (use->cost_map[i].cand == cand)
2111 return use->cost_map + i;
2113 return NULL;
2116 /* Returns estimate on cost of computing SEQ. */
2118 static unsigned
2119 seq_cost (rtx seq)
2121 unsigned cost = 0;
2122 rtx set;
2124 for (; seq; seq = NEXT_INSN (seq))
2126 set = single_set (seq);
2127 if (set)
2128 cost += rtx_cost (set, SET);
2129 else
2130 cost++;
2133 return cost;
2136 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2137 static rtx
2138 produce_memory_decl_rtl (tree obj, int *regno)
2140 rtx x;
2141 if (!obj)
2142 abort ();
2143 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2145 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2146 x = gen_rtx_SYMBOL_REF (Pmode, name);
2148 else
2149 x = gen_raw_REG (Pmode, (*regno)++);
2151 return gen_rtx_MEM (DECL_MODE (obj), x);
2154 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2155 walk_tree. DATA contains the actual fake register number. */
2157 static tree
2158 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2160 tree obj = NULL_TREE;
2161 rtx x = NULL_RTX;
2162 int *regno = data;
2164 switch (TREE_CODE (*expr_p))
2166 case ADDR_EXPR:
2167 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2168 (handled_component_p (*expr_p)
2169 || TREE_CODE (*expr_p) == REALPART_EXPR
2170 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2171 expr_p = &TREE_OPERAND (*expr_p, 0));
2172 obj = *expr_p;
2173 if (DECL_P (obj))
2174 x = produce_memory_decl_rtl (obj, regno);
2175 break;
2177 case SSA_NAME:
2178 *ws = 0;
2179 obj = SSA_NAME_VAR (*expr_p);
2180 if (!DECL_RTL_SET_P (obj))
2181 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2182 break;
2184 case VAR_DECL:
2185 case PARM_DECL:
2186 case RESULT_DECL:
2187 *ws = 0;
2188 obj = *expr_p;
2190 if (DECL_RTL_SET_P (obj))
2191 break;
2193 if (DECL_MODE (obj) == BLKmode)
2194 x = produce_memory_decl_rtl (obj, regno);
2195 else
2196 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2198 break;
2200 default:
2201 break;
2204 if (x)
2206 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2207 SET_DECL_RTL (obj, x);
2210 return NULL_TREE;
2213 /* Determines cost of the computation of EXPR. */
2215 static unsigned
2216 computation_cost (tree expr)
2218 rtx seq, rslt;
2219 tree type = TREE_TYPE (expr);
2220 unsigned cost;
2221 int regno = 0;
2223 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2224 start_sequence ();
2225 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2226 seq = get_insns ();
2227 end_sequence ();
2229 cost = seq_cost (seq);
2230 if (GET_CODE (rslt) == MEM)
2231 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2233 return cost;
2236 /* Returns variable containing the value of candidate CAND at statement AT. */
2238 static tree
2239 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2241 if (stmt_after_increment (loop, cand, stmt))
2242 return cand->var_after;
2243 else
2244 return cand->var_before;
2247 /* Determines the expression by that USE is expressed from induction variable
2248 CAND at statement AT in LOOP. */
2250 static tree
2251 get_computation_at (struct loop *loop,
2252 struct iv_use *use, struct iv_cand *cand, tree at)
2254 tree ubase = use->iv->base;
2255 tree ustep = use->iv->step;
2256 tree cbase = cand->iv->base;
2257 tree cstep = cand->iv->step;
2258 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2259 tree uutype;
2260 tree expr, delta;
2261 tree ratio;
2262 unsigned HOST_WIDE_INT ustepi, cstepi;
2263 HOST_WIDE_INT ratioi;
2265 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2267 /* We do not have a precision to express the values of use. */
2268 return NULL_TREE;
2271 expr = var_at_stmt (loop, cand, at);
2273 if (TREE_TYPE (expr) != ctype)
2275 /* This may happen with the original ivs. */
2276 expr = fold_convert (ctype, expr);
2279 if (TYPE_UNSIGNED (utype))
2280 uutype = utype;
2281 else
2283 uutype = unsigned_type_for (utype);
2284 ubase = fold_convert (uutype, ubase);
2285 ustep = fold_convert (uutype, ustep);
2288 if (uutype != ctype)
2290 expr = fold_convert (uutype, expr);
2291 cbase = fold_convert (uutype, cbase);
2292 cstep = fold_convert (uutype, cstep);
2295 if (!cst_and_fits_in_hwi (cstep)
2296 || !cst_and_fits_in_hwi (ustep))
2297 return NULL_TREE;
2299 ustepi = int_cst_value (ustep);
2300 cstepi = int_cst_value (cstep);
2302 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2304 /* TODO maybe consider case when ustep divides cstep and the ratio is
2305 a power of 2 (so that the division is fast to execute)? We would
2306 need to be much more careful with overflows etc. then. */
2307 return NULL_TREE;
2310 /* We may need to shift the value if we are after the increment. */
2311 if (stmt_after_increment (loop, cand, at))
2312 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2314 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2315 or |ratio| == 1, it is better to handle this like
2317 ubase - ratio * cbase + ratio * var. */
2319 if (ratioi == 1)
2321 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2322 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2324 else if (ratioi == -1)
2326 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2327 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2329 else if (TREE_CODE (cbase) == INTEGER_CST)
2331 ratio = build_int_cst_type (uutype, ratioi);
2332 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2333 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2334 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2335 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2337 else
2339 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2340 ratio = build_int_cst_type (uutype, ratioi);
2341 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2342 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2345 return fold_convert (utype, expr);
2348 /* Determines the expression by that USE is expressed from induction variable
2349 CAND in LOOP. */
2351 static tree
2352 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2354 return get_computation_at (loop, use, cand, use->stmt);
2357 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2359 static void
2360 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2362 tree op0, op1;
2363 enum tree_code code;
2365 while (1)
2367 if (cst_and_fits_in_hwi (*expr))
2369 *offset += int_cst_value (*expr);
2370 *expr = integer_zero_node;
2371 return;
2374 code = TREE_CODE (*expr);
2376 if (code != PLUS_EXPR && code != MINUS_EXPR)
2377 return;
2379 op0 = TREE_OPERAND (*expr, 0);
2380 op1 = TREE_OPERAND (*expr, 1);
2382 if (cst_and_fits_in_hwi (op1))
2384 if (code == PLUS_EXPR)
2385 *offset += int_cst_value (op1);
2386 else
2387 *offset -= int_cst_value (op1);
2389 *expr = op0;
2390 continue;
2393 if (code != PLUS_EXPR)
2394 return;
2396 if (!cst_and_fits_in_hwi (op0))
2397 return;
2399 *offset += int_cst_value (op0);
2400 *expr = op1;
2404 /* Returns cost of addition in MODE. */
2406 static unsigned
2407 add_cost (enum machine_mode mode)
2409 static unsigned costs[NUM_MACHINE_MODES];
2410 rtx seq;
2411 unsigned cost;
2413 if (costs[mode])
2414 return costs[mode];
2416 start_sequence ();
2417 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2418 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2419 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2420 NULL_RTX);
2421 seq = get_insns ();
2422 end_sequence ();
2424 cost = seq_cost (seq);
2425 if (!cost)
2426 cost = 1;
2428 costs[mode] = cost;
2430 if (dump_file && (dump_flags & TDF_DETAILS))
2431 fprintf (dump_file, "Addition in %s costs %d\n",
2432 GET_MODE_NAME (mode), cost);
2433 return cost;
2436 /* Entry in a hashtable of already known costs for multiplication. */
2437 struct mbc_entry
2439 HOST_WIDE_INT cst; /* The constant to multiply by. */
2440 enum machine_mode mode; /* In mode. */
2441 unsigned cost; /* The cost. */
2444 /* Counts hash value for the ENTRY. */
2446 static hashval_t
2447 mbc_entry_hash (const void *entry)
2449 const struct mbc_entry *e = entry;
2451 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2454 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2456 static int
2457 mbc_entry_eq (const void *entry1, const void *entry2)
2459 const struct mbc_entry *e1 = entry1;
2460 const struct mbc_entry *e2 = entry2;
2462 return (e1->mode == e2->mode
2463 && e1->cst == e2->cst);
2466 /* Returns cost of multiplication by constant CST in MODE. */
2468 static unsigned
2469 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2471 static htab_t costs;
2472 struct mbc_entry **cached, act;
2473 rtx seq;
2474 unsigned cost;
2476 if (!costs)
2477 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2479 act.mode = mode;
2480 act.cst = cst;
2481 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2482 if (*cached)
2483 return (*cached)->cost;
2485 *cached = xmalloc (sizeof (struct mbc_entry));
2486 (*cached)->mode = mode;
2487 (*cached)->cst = cst;
2489 start_sequence ();
2490 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2491 NULL_RTX, 0);
2492 seq = get_insns ();
2493 end_sequence ();
2495 cost = seq_cost (seq);
2497 if (dump_file && (dump_flags & TDF_DETAILS))
2498 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2499 (int) cst, GET_MODE_NAME (mode), cost);
2501 (*cached)->cost = cost;
2503 return cost;
2506 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2507 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2508 variable is omitted. The created memory accesses MODE.
2510 TODO -- there must be some better way. This all is quite crude. */
2512 static unsigned
2513 get_address_cost (bool symbol_present, bool var_present,
2514 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2516 #define MAX_RATIO 128
2517 static sbitmap valid_mult;
2518 static HOST_WIDE_INT rat, off;
2519 static HOST_WIDE_INT min_offset, max_offset;
2520 static unsigned costs[2][2][2][2];
2521 unsigned cost, acost;
2522 rtx seq, addr, base;
2523 bool offset_p, ratio_p;
2524 rtx reg1;
2525 HOST_WIDE_INT s_offset;
2526 unsigned HOST_WIDE_INT mask;
2527 unsigned bits;
2529 if (!valid_mult)
2531 HOST_WIDE_INT i;
2533 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2535 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2536 for (i = 1; i <= 1 << 20; i <<= 1)
2538 XEXP (addr, 1) = GEN_INT (i);
2539 if (!memory_address_p (Pmode, addr))
2540 break;
2542 max_offset = i >> 1;
2543 off = max_offset;
2545 for (i = 1; i <= 1 << 20; i <<= 1)
2547 XEXP (addr, 1) = GEN_INT (-i);
2548 if (!memory_address_p (Pmode, addr))
2549 break;
2551 min_offset = -(i >> 1);
2553 if (dump_file && (dump_flags & TDF_DETAILS))
2555 fprintf (dump_file, "get_address_cost:\n");
2556 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2557 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2560 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2561 sbitmap_zero (valid_mult);
2562 rat = 1;
2563 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2564 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2566 XEXP (addr, 1) = GEN_INT (i);
2567 if (memory_address_p (Pmode, addr))
2569 SET_BIT (valid_mult, i + MAX_RATIO);
2570 rat = i;
2574 if (dump_file && (dump_flags & TDF_DETAILS))
2576 fprintf (dump_file, " allowed multipliers:");
2577 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2578 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2579 fprintf (dump_file, " %d", (int) i);
2580 fprintf (dump_file, "\n");
2581 fprintf (dump_file, "\n");
2585 bits = GET_MODE_BITSIZE (Pmode);
2586 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2587 offset &= mask;
2588 if ((offset >> (bits - 1) & 1))
2589 offset |= ~mask;
2590 s_offset = offset;
2592 cost = 0;
2593 offset_p = (s_offset != 0
2594 && min_offset <= s_offset && s_offset <= max_offset);
2595 ratio_p = (ratio != 1
2596 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2597 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2599 if (ratio != 1 && !ratio_p)
2600 cost += multiply_by_cost (ratio, Pmode);
2602 if (s_offset && !offset_p && !symbol_present)
2604 cost += add_cost (Pmode);
2605 var_present = true;
2608 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2609 if (!acost)
2611 acost = 0;
2613 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2614 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2615 if (ratio_p)
2616 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2618 if (var_present)
2619 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2621 if (symbol_present)
2623 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2624 if (offset_p)
2625 base = gen_rtx_fmt_e (CONST, Pmode,
2626 gen_rtx_fmt_ee (PLUS, Pmode,
2627 base,
2628 GEN_INT (off)));
2630 else if (offset_p)
2631 base = GEN_INT (off);
2632 else
2633 base = NULL_RTX;
2635 if (base)
2636 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2638 start_sequence ();
2639 addr = memory_address (Pmode, addr);
2640 seq = get_insns ();
2641 end_sequence ();
2643 acost = seq_cost (seq);
2644 acost += address_cost (addr, Pmode);
2646 if (!acost)
2647 acost = 1;
2648 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2651 return cost + acost;
2654 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2655 the bitmap to that we should store it. */
2657 static struct ivopts_data *fd_ivopts_data;
2658 static tree
2659 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2661 bitmap *depends_on = data;
2662 struct version_info *info;
2664 if (TREE_CODE (*expr_p) != SSA_NAME)
2665 return NULL_TREE;
2666 info = name_info (fd_ivopts_data, *expr_p);
2668 if (!info->inv_id || info->has_nonlin_use)
2669 return NULL_TREE;
2671 if (!*depends_on)
2672 *depends_on = BITMAP_XMALLOC ();
2673 bitmap_set_bit (*depends_on, info->inv_id);
2675 return NULL_TREE;
2678 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2679 invariants the computation depends on. */
2681 static unsigned
2682 force_var_cost (struct ivopts_data *data,
2683 tree expr, bitmap *depends_on)
2685 static bool costs_initialized = false;
2686 static unsigned integer_cost;
2687 static unsigned symbol_cost;
2688 static unsigned address_cost;
2689 tree op0, op1;
2690 unsigned cost0, cost1, cost;
2691 enum machine_mode mode;
2693 if (!costs_initialized)
2695 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2696 rtx x = gen_rtx_MEM (DECL_MODE (var),
2697 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2698 tree addr;
2699 tree type = build_pointer_type (integer_type_node);
2701 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2702 2000));
2704 SET_DECL_RTL (var, x);
2705 TREE_STATIC (var) = 1;
2706 addr = build1 (ADDR_EXPR, type, var);
2707 symbol_cost = computation_cost (addr) + 1;
2709 address_cost
2710 = computation_cost (build2 (PLUS_EXPR, type,
2711 addr,
2712 build_int_cst_type (type, 2000))) + 1;
2713 if (dump_file && (dump_flags & TDF_DETAILS))
2715 fprintf (dump_file, "force_var_cost:\n");
2716 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2717 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2718 fprintf (dump_file, " address %d\n", (int) address_cost);
2719 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2720 fprintf (dump_file, "\n");
2723 costs_initialized = true;
2726 if (depends_on)
2728 fd_ivopts_data = data;
2729 walk_tree (&expr, find_depends, depends_on, NULL);
2732 if (SSA_VAR_P (expr))
2733 return 0;
2735 if (TREE_INVARIANT (expr))
2737 if (TREE_CODE (expr) == INTEGER_CST)
2738 return integer_cost;
2740 if (TREE_CODE (expr) == ADDR_EXPR)
2742 tree obj = TREE_OPERAND (expr, 0);
2744 if (TREE_CODE (obj) == VAR_DECL
2745 || TREE_CODE (obj) == PARM_DECL
2746 || TREE_CODE (obj) == RESULT_DECL)
2747 return symbol_cost;
2750 return address_cost;
2753 switch (TREE_CODE (expr))
2755 case PLUS_EXPR:
2756 case MINUS_EXPR:
2757 case MULT_EXPR:
2758 op0 = TREE_OPERAND (expr, 0);
2759 op1 = TREE_OPERAND (expr, 1);
2761 if (is_gimple_val (op0))
2762 cost0 = 0;
2763 else
2764 cost0 = force_var_cost (data, op0, NULL);
2766 if (is_gimple_val (op1))
2767 cost1 = 0;
2768 else
2769 cost1 = force_var_cost (data, op1, NULL);
2771 break;
2773 default:
2774 /* Just an arbitrary value, FIXME. */
2775 return target_spill_cost;
2778 mode = TYPE_MODE (TREE_TYPE (expr));
2779 switch (TREE_CODE (expr))
2781 case PLUS_EXPR:
2782 case MINUS_EXPR:
2783 cost = add_cost (mode);
2784 break;
2786 case MULT_EXPR:
2787 if (cst_and_fits_in_hwi (op0))
2788 cost = multiply_by_cost (int_cst_value (op0), mode);
2789 else if (cst_and_fits_in_hwi (op1))
2790 cost = multiply_by_cost (int_cst_value (op1), mode);
2791 else
2792 return target_spill_cost;
2793 break;
2795 default:
2796 gcc_unreachable ();
2799 cost += cost0;
2800 cost += cost1;
2802 /* Bound the cost by target_spill_cost. The parts of complicated
2803 computations often are either loop invariant or at least can
2804 be shared between several iv uses, so letting this grow without
2805 limits would not give reasonable results. */
2806 return cost < target_spill_cost ? cost : target_spill_cost;
2809 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2810 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2811 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2812 invariants the computation depends on. */
2814 static unsigned
2815 split_address_cost (struct ivopts_data *data,
2816 tree addr, bool *symbol_present, bool *var_present,
2817 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2819 tree core;
2820 HOST_WIDE_INT bitsize;
2821 HOST_WIDE_INT bitpos;
2822 tree toffset;
2823 enum machine_mode mode;
2824 int unsignedp, volatilep;
2826 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2827 &unsignedp, &volatilep);
2829 if (toffset != 0
2830 || bitpos % BITS_PER_UNIT != 0
2831 || TREE_CODE (core) != VAR_DECL)
2833 *symbol_present = false;
2834 *var_present = true;
2835 fd_ivopts_data = data;
2836 walk_tree (&addr, find_depends, depends_on, NULL);
2837 return target_spill_cost;
2840 *offset += bitpos / BITS_PER_UNIT;
2841 if (TREE_STATIC (core)
2842 || DECL_EXTERNAL (core))
2844 *symbol_present = true;
2845 *var_present = false;
2846 return 0;
2849 *symbol_present = false;
2850 *var_present = true;
2851 return 0;
2854 /* Estimates cost of expressing difference of addresses E1 - E2 as
2855 var + symbol + offset. The value of offset is added to OFFSET,
2856 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2857 part is missing. DEPENDS_ON is a set of the invariants the computation
2858 depends on. */
2860 static unsigned
2861 ptr_difference_cost (struct ivopts_data *data,
2862 tree e1, tree e2, bool *symbol_present, bool *var_present,
2863 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2865 HOST_WIDE_INT diff = 0;
2866 unsigned cost;
2868 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2870 if (ptr_difference_const (e1, e2, &diff))
2872 *offset += diff;
2873 *symbol_present = false;
2874 *var_present = false;
2875 return 0;
2878 if (e2 == integer_zero_node)
2879 return split_address_cost (data, TREE_OPERAND (e1, 0),
2880 symbol_present, var_present, offset, depends_on);
2882 *symbol_present = false;
2883 *var_present = true;
2885 cost = force_var_cost (data, e1, depends_on);
2886 cost += force_var_cost (data, e2, depends_on);
2887 cost += add_cost (Pmode);
2889 return cost;
2892 /* Estimates cost of expressing difference E1 - E2 as
2893 var + symbol + offset. The value of offset is added to OFFSET,
2894 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2895 part is missing. DEPENDS_ON is a set of the invariants the computation
2896 depends on. */
2898 static unsigned
2899 difference_cost (struct ivopts_data *data,
2900 tree e1, tree e2, bool *symbol_present, bool *var_present,
2901 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2903 unsigned cost;
2904 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2906 strip_offset (&e1, offset);
2907 *offset = -*offset;
2908 strip_offset (&e2, offset);
2909 *offset = -*offset;
2911 if (TREE_CODE (e1) == ADDR_EXPR)
2912 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2913 depends_on);
2914 *symbol_present = false;
2916 if (operand_equal_p (e1, e2, 0))
2918 *var_present = false;
2919 return 0;
2921 *var_present = true;
2922 if (zero_p (e2))
2923 return force_var_cost (data, e1, depends_on);
2925 if (zero_p (e1))
2927 cost = force_var_cost (data, e2, depends_on);
2928 cost += multiply_by_cost (-1, mode);
2930 return cost;
2933 cost = force_var_cost (data, e1, depends_on);
2934 cost += force_var_cost (data, e2, depends_on);
2935 cost += add_cost (mode);
2937 return cost;
2940 /* Determines the cost of the computation by that USE is expressed
2941 from induction variable CAND. If ADDRESS_P is true, we just need
2942 to create an address from it, otherwise we want to get it into
2943 register. A set of invariants we depend on is stored in
2944 DEPENDS_ON. AT is the statement at that the value is computed. */
2946 static unsigned
2947 get_computation_cost_at (struct ivopts_data *data,
2948 struct iv_use *use, struct iv_cand *cand,
2949 bool address_p, bitmap *depends_on, tree at)
2951 tree ubase = use->iv->base, ustep = use->iv->step;
2952 tree cbase, cstep;
2953 tree utype = TREE_TYPE (ubase), ctype;
2954 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2955 HOST_WIDE_INT ratio, aratio;
2956 bool var_present, symbol_present;
2957 unsigned cost = 0, n_sums;
2959 *depends_on = NULL;
2961 /* Only consider real candidates. */
2962 if (!cand->iv)
2963 return INFTY;
2965 cbase = cand->iv->base;
2966 cstep = cand->iv->step;
2967 ctype = TREE_TYPE (cbase);
2969 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2971 /* We do not have a precision to express the values of use. */
2972 return INFTY;
2975 if (address_p)
2977 /* Do not try to express address of an object with computation based
2978 on address of a different object. This may cause problems in rtl
2979 level alias analysis (that does not expect this to be happening,
2980 as this is illegal in C), and would be unlikely to be useful
2981 anyway. */
2982 if (use->iv->base_object
2983 && cand->iv->base_object
2984 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2985 return INFTY;
2988 if (!cst_and_fits_in_hwi (ustep)
2989 || !cst_and_fits_in_hwi (cstep))
2990 return INFTY;
2992 if (TREE_CODE (ubase) == INTEGER_CST
2993 && !cst_and_fits_in_hwi (ubase))
2994 goto fallback;
2996 if (TREE_CODE (cbase) == INTEGER_CST
2997 && !cst_and_fits_in_hwi (cbase))
2998 goto fallback;
3000 ustepi = int_cst_value (ustep);
3001 cstepi = int_cst_value (cstep);
3003 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3005 /* TODO -- add direct handling of this case. */
3006 goto fallback;
3009 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3010 return INFTY;
3012 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3013 or ratio == 1, it is better to handle this like
3015 ubase - ratio * cbase + ratio * var
3017 (also holds in the case ratio == -1, TODO. */
3019 if (TREE_CODE (cbase) == INTEGER_CST)
3021 offset = - ratio * int_cst_value (cbase);
3022 cost += difference_cost (data,
3023 ubase, integer_zero_node,
3024 &symbol_present, &var_present, &offset,
3025 depends_on);
3027 else if (ratio == 1)
3029 cost += difference_cost (data,
3030 ubase, cbase,
3031 &symbol_present, &var_present, &offset,
3032 depends_on);
3034 else
3036 cost += force_var_cost (data, cbase, depends_on);
3037 cost += add_cost (TYPE_MODE (ctype));
3038 cost += difference_cost (data,
3039 ubase, integer_zero_node,
3040 &symbol_present, &var_present, &offset,
3041 depends_on);
3044 /* If we are after the increment, the value of the candidate is higher by
3045 one iteration. */
3046 if (stmt_after_increment (data->current_loop, cand, at))
3047 offset -= ratio * cstepi;
3049 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3050 (symbol/var/const parts may be omitted). If we are looking for an address,
3051 find the cost of addressing this. */
3052 if (address_p)
3053 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3055 /* Otherwise estimate the costs for computing the expression. */
3056 aratio = ratio > 0 ? ratio : -ratio;
3057 if (!symbol_present && !var_present && !offset)
3059 if (ratio != 1)
3060 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3062 return cost;
3065 if (aratio != 1)
3066 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3068 n_sums = 1;
3069 if (var_present
3070 /* Symbol + offset should be compile-time computable. */
3071 && (symbol_present || offset))
3072 n_sums++;
3074 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3076 fallback:
3078 /* Just get the expression, expand it and measure the cost. */
3079 tree comp = get_computation_at (data->current_loop, use, cand, at);
3081 if (!comp)
3082 return INFTY;
3084 if (address_p)
3085 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3087 return computation_cost (comp);
3091 /* Determines the cost of the computation by that USE is expressed
3092 from induction variable CAND. If ADDRESS_P is true, we just need
3093 to create an address from it, otherwise we want to get it into
3094 register. A set of invariants we depend on is stored in
3095 DEPENDS_ON. */
3097 static unsigned
3098 get_computation_cost (struct ivopts_data *data,
3099 struct iv_use *use, struct iv_cand *cand,
3100 bool address_p, bitmap *depends_on)
3102 return get_computation_cost_at (data,
3103 use, cand, address_p, depends_on, use->stmt);
3106 /* Determines cost of basing replacement of USE on CAND in a generic
3107 expression. */
3109 static bool
3110 determine_use_iv_cost_generic (struct ivopts_data *data,
3111 struct iv_use *use, struct iv_cand *cand)
3113 bitmap depends_on;
3114 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
3116 set_use_iv_cost (data, use, cand, cost, depends_on);
3118 return cost != INFTY;
3121 /* Determines cost of basing replacement of USE on CAND in an address. */
3123 static bool
3124 determine_use_iv_cost_address (struct ivopts_data *data,
3125 struct iv_use *use, struct iv_cand *cand)
3127 bitmap depends_on;
3128 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3130 set_use_iv_cost (data, use, cand, cost, depends_on);
3132 return cost != INFTY;
3135 /* Computes value of induction variable IV in iteration NITER. */
3137 static tree
3138 iv_value (struct iv *iv, tree niter)
3140 tree val;
3141 tree type = TREE_TYPE (iv->base);
3143 niter = fold_convert (type, niter);
3144 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3146 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3149 /* Computes value of candidate CAND at position AT in iteration NITER. */
3151 static tree
3152 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3154 tree val = iv_value (cand->iv, niter);
3155 tree type = TREE_TYPE (cand->iv->base);
3157 if (stmt_after_increment (loop, cand, at))
3158 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3160 return val;
3163 /* Check whether it is possible to express the condition in USE by comparison
3164 of candidate CAND. If so, store the comparison code to COMPARE and the
3165 value compared with to BOUND. */
3167 static bool
3168 may_eliminate_iv (struct loop *loop,
3169 struct iv_use *use, struct iv_cand *cand,
3170 enum tree_code *compare, tree *bound)
3172 basic_block ex_bb;
3173 edge exit;
3174 struct tree_niter_desc niter, new_niter;
3175 tree wider_type, type, base;
3177 /* For now works only for exits that dominate the loop latch. TODO -- extend
3178 for other conditions inside loop body. */
3179 ex_bb = bb_for_stmt (use->stmt);
3180 if (use->stmt != last_stmt (ex_bb)
3181 || TREE_CODE (use->stmt) != COND_EXPR)
3182 return false;
3183 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3184 return false;
3186 exit = EDGE_SUCC (ex_bb, 0);
3187 if (flow_bb_inside_loop_p (loop, exit->dest))
3188 exit = EDGE_SUCC (ex_bb, 1);
3189 if (flow_bb_inside_loop_p (loop, exit->dest))
3190 return false;
3192 niter.niter = NULL_TREE;
3193 number_of_iterations_exit (loop, exit, &niter);
3194 if (!niter.niter
3195 || !integer_nonzerop (niter.assumptions)
3196 || !integer_zerop (niter.may_be_zero))
3197 return false;
3199 if (exit->flags & EDGE_TRUE_VALUE)
3200 *compare = EQ_EXPR;
3201 else
3202 *compare = NE_EXPR;
3204 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3206 /* Let us check there is not some problem with overflows, by checking that
3207 the number of iterations is unchanged. */
3208 base = cand->iv->base;
3209 type = TREE_TYPE (base);
3210 if (stmt_after_increment (loop, cand, use->stmt))
3211 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3213 new_niter.niter = NULL_TREE;
3214 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3215 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3216 &new_niter);
3217 if (!new_niter.niter
3218 || !integer_nonzerop (new_niter.assumptions)
3219 || !integer_zerop (new_niter.may_be_zero))
3220 return false;
3222 wider_type = TREE_TYPE (new_niter.niter);
3223 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3224 wider_type = TREE_TYPE (niter.niter);
3225 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3226 fold_convert (wider_type, new_niter.niter), 0))
3227 return false;
3229 return true;
3232 /* Determines cost of basing replacement of USE on CAND in a condition. */
3234 static bool
3235 determine_use_iv_cost_condition (struct ivopts_data *data,
3236 struct iv_use *use, struct iv_cand *cand)
3238 tree bound;
3239 enum tree_code compare;
3241 /* Only consider real candidates. */
3242 if (!cand->iv)
3244 set_use_iv_cost (data, use, cand, INFTY, NULL);
3245 return false;
3248 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3250 bitmap depends_on = NULL;
3251 unsigned cost = force_var_cost (data, bound, &depends_on);
3253 set_use_iv_cost (data, use, cand, cost, depends_on);
3254 return cost != INFTY;
3257 /* The induction variable elimination failed; just express the original
3258 giv. If it is compared with an invariant, note that we cannot get
3259 rid of it. */
3260 if (TREE_CODE (*use->op_p) == SSA_NAME)
3261 record_invariant (data, *use->op_p, true);
3262 else
3264 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3265 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3268 return determine_use_iv_cost_generic (data, use, cand);
3271 /* Checks whether it is possible to replace the final value of USE by
3272 a direct computation. If so, the formula is stored to *VALUE. */
3274 static bool
3275 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3277 edge exit;
3278 struct tree_niter_desc *niter;
3280 exit = single_dom_exit (loop);
3281 if (!exit)
3282 return false;
3284 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3285 bb_for_stmt (use->stmt)));
3287 niter = &loop_data (loop)->niter;
3288 if (!niter->niter
3289 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3290 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3291 return false;
3293 *value = iv_value (use->iv, niter->niter);
3295 return true;
3298 /* Determines cost of replacing final value of USE using CAND. */
3300 static bool
3301 determine_use_iv_cost_outer (struct ivopts_data *data,
3302 struct iv_use *use, struct iv_cand *cand)
3304 bitmap depends_on;
3305 unsigned cost;
3306 edge exit;
3307 tree value;
3308 struct loop *loop = data->current_loop;
3310 if (!cand->iv)
3312 if (!may_replace_final_value (loop, use, &value))
3314 set_use_iv_cost (data, use, cand, INFTY, NULL);
3315 return false;
3318 depends_on = NULL;
3319 cost = force_var_cost (data, value, &depends_on);
3321 cost /= AVG_LOOP_NITER (loop);
3323 set_use_iv_cost (data, use, cand, cost, depends_on);
3324 return cost != INFTY;
3327 exit = single_dom_exit (loop);
3328 if (exit)
3330 /* If there is just a single exit, we may use value of the candidate
3331 after we take it to determine the value of use. */
3332 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3333 last_stmt (exit->src));
3334 if (cost != INFTY)
3335 cost /= AVG_LOOP_NITER (loop);
3337 else
3339 /* Otherwise we just need to compute the iv. */
3340 cost = get_computation_cost (data, use, cand, false, &depends_on);
3343 set_use_iv_cost (data, use, cand, cost, depends_on);
3345 return cost != INFTY;
3348 /* Determines cost of basing replacement of USE on CAND. Returns false
3349 if USE cannot be based on CAND. */
3351 static bool
3352 determine_use_iv_cost (struct ivopts_data *data,
3353 struct iv_use *use, struct iv_cand *cand)
3355 switch (use->type)
3357 case USE_NONLINEAR_EXPR:
3358 return determine_use_iv_cost_generic (data, use, cand);
3360 case USE_OUTER:
3361 return determine_use_iv_cost_outer (data, use, cand);
3363 case USE_ADDRESS:
3364 return determine_use_iv_cost_address (data, use, cand);
3366 case USE_COMPARE:
3367 return determine_use_iv_cost_condition (data, use, cand);
3369 default:
3370 gcc_unreachable ();
3374 /* Determines costs of basing the use of the iv on an iv candidate. */
3376 static void
3377 determine_use_iv_costs (struct ivopts_data *data)
3379 unsigned i, j;
3380 struct iv_use *use;
3381 struct iv_cand *cand;
3382 bitmap to_clear = BITMAP_XMALLOC ();
3384 alloc_use_cost_map (data);
3386 for (i = 0; i < n_iv_uses (data); i++)
3388 use = iv_use (data, i);
3390 if (data->consider_all_candidates)
3392 for (j = 0; j < n_iv_cands (data); j++)
3394 cand = iv_cand (data, j);
3395 determine_use_iv_cost (data, use, cand);
3398 else
3400 bitmap_iterator bi;
3402 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3404 cand = iv_cand (data, j);
3405 if (!determine_use_iv_cost (data, use, cand))
3406 bitmap_set_bit (to_clear, j);
3409 /* Remove the candidates for that the cost is infinite from
3410 the list of related candidates. */
3411 bitmap_and_compl_into (use->related_cands, to_clear);
3412 bitmap_clear (to_clear);
3416 BITMAP_XFREE (to_clear);
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, "Use-candidate costs:\n");
3422 for (i = 0; i < n_iv_uses (data); i++)
3424 use = iv_use (data, i);
3426 fprintf (dump_file, "Use %d:\n", i);
3427 fprintf (dump_file, " cand\tcost\tdepends on\n");
3428 for (j = 0; j < use->n_map_members; j++)
3430 if (!use->cost_map[j].cand
3431 || use->cost_map[j].cost == INFTY)
3432 continue;
3434 fprintf (dump_file, " %d\t%d\t",
3435 use->cost_map[j].cand->id,
3436 use->cost_map[j].cost);
3437 if (use->cost_map[j].depends_on)
3438 bitmap_print (dump_file,
3439 use->cost_map[j].depends_on, "","");
3440 fprintf (dump_file, "\n");
3443 fprintf (dump_file, "\n");
3445 fprintf (dump_file, "\n");
3449 /* Determines cost of the candidate CAND. */
3451 static void
3452 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3454 unsigned cost_base, cost_step;
3455 tree base, last;
3456 basic_block bb;
3458 if (!cand->iv)
3460 cand->cost = 0;
3461 return;
3464 /* There are two costs associated with the candidate -- its increment
3465 and its initialization. The second is almost negligible for any loop
3466 that rolls enough, so we take it just very little into account. */
3468 base = cand->iv->base;
3469 cost_base = force_var_cost (data, base, NULL);
3470 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3472 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3474 /* Prefer the original iv unless we may gain something by replacing it. */
3475 if (cand->pos == IP_ORIGINAL)
3476 cand->cost--;
3478 /* Prefer not to insert statements into latch unless there are some
3479 already (so that we do not create unnecessary jumps). */
3480 if (cand->pos == IP_END)
3482 bb = ip_end_pos (data->current_loop);
3483 last = last_stmt (bb);
3485 if (!last
3486 || TREE_CODE (last) == LABEL_EXPR)
3487 cand->cost++;
3491 /* Determines costs of computation of the candidates. */
3493 static void
3494 determine_iv_costs (struct ivopts_data *data)
3496 unsigned i;
3498 if (dump_file && (dump_flags & TDF_DETAILS))
3500 fprintf (dump_file, "Candidate costs:\n");
3501 fprintf (dump_file, " cand\tcost\n");
3504 for (i = 0; i < n_iv_cands (data); i++)
3506 struct iv_cand *cand = iv_cand (data, i);
3508 determine_iv_cost (data, cand);
3510 if (dump_file && (dump_flags & TDF_DETAILS))
3511 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3514 if (dump_file && (dump_flags & TDF_DETAILS))
3515 fprintf (dump_file, "\n");
3518 /* Calculates cost for having SIZE induction variables. */
3520 static unsigned
3521 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3523 return global_cost_for_size (size,
3524 loop_data (data->current_loop)->regs_used,
3525 n_iv_uses (data));
3528 /* For each size of the induction variable set determine the penalty. */
3530 static void
3531 determine_set_costs (struct ivopts_data *data)
3533 unsigned j, n;
3534 tree phi, op;
3535 struct loop *loop = data->current_loop;
3536 bitmap_iterator bi;
3538 /* We use the following model (definitely improvable, especially the
3539 cost function -- TODO):
3541 We estimate the number of registers available (using MD data), name it A.
3543 We estimate the number of registers used by the loop, name it U. This
3544 number is obtained as the number of loop phi nodes (not counting virtual
3545 registers and bivs) + the number of variables from outside of the loop.
3547 We set a reserve R (free regs that are used for temporary computations,
3548 etc.). For now the reserve is a constant 3.
3550 Let I be the number of induction variables.
3552 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3553 make a lot of ivs without a reason).
3554 -- if A - R < U + I <= A, the cost is I * PRES_COST
3555 -- if U + I > A, the cost is I * PRES_COST and
3556 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3560 fprintf (dump_file, "Global costs:\n");
3561 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3562 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3563 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3564 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3567 n = 0;
3568 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3570 op = PHI_RESULT (phi);
3572 if (!is_gimple_reg (op))
3573 continue;
3575 if (get_iv (data, op))
3576 continue;
3578 n++;
3581 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3583 struct version_info *info = ver_info (data, j);
3585 if (info->inv_id && info->has_nonlin_use)
3586 n++;
3589 loop_data (loop)->regs_used = n;
3590 if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (dump_file, " regs_used %d\n", n);
3593 if (dump_file && (dump_flags & TDF_DETAILS))
3595 fprintf (dump_file, " cost for size:\n");
3596 fprintf (dump_file, " ivs\tcost\n");
3597 for (j = 0; j <= 2 * target_avail_regs; j++)
3598 fprintf (dump_file, " %d\t%d\n", j,
3599 ivopts_global_cost_for_size (data, j));
3600 fprintf (dump_file, "\n");
3604 /* Returns true if A is a cheaper cost pair than B. */
3606 static bool
3607 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3609 if (!a)
3610 return false;
3612 if (!b)
3613 return true;
3615 if (a->cost < b->cost)
3616 return true;
3618 if (a->cost > b->cost)
3619 return false;
3621 /* In case the costs are the same, prefer the cheaper candidate. */
3622 if (a->cand->cost < b->cand->cost)
3623 return true;
3625 return false;
3628 /* Computes the cost field of IVS structure. */
3630 static void
3631 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3633 unsigned cost = 0;
3635 cost += ivs->cand_use_cost;
3636 cost += ivs->cand_cost;
3637 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3639 ivs->cost = cost;
3642 /* Set USE not to be expressed by any candidate in IVS. */
3644 static void
3645 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3646 struct iv_use *use)
3648 unsigned uid = use->id, cid, iid;
3649 bitmap deps;
3650 struct cost_pair *cp;
3651 bitmap_iterator bi;
3653 cp = ivs->cand_for_use[uid];
3654 if (!cp)
3655 return;
3656 cid = cp->cand->id;
3658 ivs->bad_uses++;
3659 ivs->cand_for_use[uid] = NULL;
3660 ivs->n_cand_uses[cid]--;
3662 if (ivs->n_cand_uses[cid] == 0)
3664 bitmap_clear_bit (ivs->cands, cid);
3665 /* Do not count the pseudocandidates. */
3666 if (cp->cand->iv)
3667 ivs->n_regs--;
3668 ivs->cand_cost -= cp->cand->cost;
3671 ivs->cand_use_cost -= cp->cost;
3673 deps = cp->depends_on;
3675 if (deps)
3677 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3679 ivs->n_invariant_uses[iid]--;
3680 if (ivs->n_invariant_uses[iid] == 0)
3681 ivs->n_regs--;
3685 iv_ca_recount_cost (data, ivs);
3688 /* Set cost pair for USE in set IVS to CP. */
3690 static void
3691 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3692 struct iv_use *use, struct cost_pair *cp)
3694 unsigned uid = use->id, cid, iid;
3695 bitmap deps;
3696 bitmap_iterator bi;
3698 if (ivs->cand_for_use[uid] == cp)
3699 return;
3701 if (ivs->cand_for_use[uid])
3702 iv_ca_set_no_cp (data, ivs, use);
3704 if (cp)
3706 cid = cp->cand->id;
3708 ivs->bad_uses--;
3709 ivs->cand_for_use[uid] = cp;
3710 ivs->n_cand_uses[cid]++;
3711 if (ivs->n_cand_uses[cid] == 1)
3713 bitmap_set_bit (ivs->cands, cid);
3714 /* Do not count the pseudocandidates. */
3715 if (cp->cand->iv)
3716 ivs->n_regs++;
3717 ivs->cand_cost += cp->cand->cost;
3720 ivs->cand_use_cost += cp->cost;
3722 deps = cp->depends_on;
3724 if (deps)
3726 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3728 ivs->n_invariant_uses[iid]++;
3729 if (ivs->n_invariant_uses[iid] == 1)
3730 ivs->n_regs++;
3734 iv_ca_recount_cost (data, ivs);
3738 /* Extend set IVS by expressing USE by some of the candidates in it
3739 if possible. */
3741 static void
3742 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3743 struct iv_use *use)
3745 struct cost_pair *best_cp = NULL, *cp;
3746 bitmap_iterator bi;
3747 unsigned i;
3749 gcc_assert (ivs->upto >= use->id);
3751 if (ivs->upto == use->id)
3753 ivs->upto++;
3754 ivs->bad_uses++;
3757 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3759 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3761 if (cheaper_cost_pair (cp, best_cp))
3762 best_cp = cp;
3765 iv_ca_set_cp (data, ivs, use, best_cp);
3768 /* Get cost for assignment IVS. */
3770 static unsigned
3771 iv_ca_cost (struct iv_ca *ivs)
3773 return (ivs->bad_uses ? INFTY : ivs->cost);
3776 /* Returns true if all dependences of CP are among invariants in IVS. */
3778 static bool
3779 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3781 unsigned i;
3782 bitmap_iterator bi;
3784 if (!cp->depends_on)
3785 return true;
3787 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3789 if (ivs->n_invariant_uses[i] == 0)
3790 return false;
3793 return true;
3796 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3797 it before NEXT_CHANGE. */
3799 static struct iv_ca_delta *
3800 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3801 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3803 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3805 change->use = use;
3806 change->old_cp = old_cp;
3807 change->new_cp = new_cp;
3808 change->next_change = next_change;
3810 return change;
3813 /* Returns candidate by that USE is expressed in IVS. */
3815 static struct cost_pair *
3816 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3818 return ivs->cand_for_use[use->id];
3821 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3822 reverted instead. */
3824 static void
3825 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3826 struct iv_ca_delta *delta, bool forward)
3828 struct cost_pair *from, *to;
3830 for (; delta; delta = delta->next_change)
3832 if (forward)
3834 from = delta->old_cp;
3835 to = delta->new_cp;
3837 else
3839 from = delta->new_cp;
3840 to = delta->old_cp;
3843 gcc_assert (iv_ca_cand_for_use (ivs, delta->use) == from);
3844 iv_ca_set_cp (data, ivs, delta->use, to);
3848 /* Returns true if CAND is used in IVS. */
3850 static bool
3851 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3853 return ivs->n_cand_uses[cand->id] > 0;
3856 /* Free the list of changes DELTA. */
3858 static void
3859 iv_ca_delta_free (struct iv_ca_delta **delta)
3861 struct iv_ca_delta *act, *next;
3863 for (act = *delta; act; act = next)
3865 next = act->next_change;
3866 free (act);
3869 *delta = NULL;
3872 /* Allocates new iv candidates assignment. */
3874 static struct iv_ca *
3875 iv_ca_new (struct ivopts_data *data)
3877 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3879 nw->upto = 0;
3880 nw->bad_uses = 0;
3881 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3882 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
3883 nw->cands = BITMAP_XMALLOC ();
3884 nw->n_regs = 0;
3885 nw->cand_use_cost = 0;
3886 nw->cand_cost = 0;
3887 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
3888 nw->cost = 0;
3890 return nw;
3893 /* Free memory occupied by the set IVS. */
3895 static void
3896 iv_ca_free (struct iv_ca **ivs)
3898 free ((*ivs)->cand_for_use);
3899 free ((*ivs)->n_cand_uses);
3900 BITMAP_XFREE ((*ivs)->cands);
3901 free ((*ivs)->n_invariant_uses);
3902 free (*ivs);
3903 *ivs = NULL;
3906 /* Dumps IVS to FILE. */
3908 static void
3909 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
3911 const char *pref = " invariants ";
3912 unsigned i;
3914 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
3915 bitmap_print (file, ivs->cands, " candidates ","\n");
3917 for (i = 1; i <= data->max_inv_id; i++)
3918 if (ivs->n_invariant_uses[i])
3920 fprintf (file, "%s%d", pref, i);
3921 pref = ", ";
3923 fprintf (file, "\n");
3926 /* Try changing candidate in IVS to CAND for each use. Return cost of the
3927 new set, and store differences in DELTA. */
3929 static unsigned
3930 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
3931 struct iv_cand *cand, struct iv_ca_delta **delta)
3933 unsigned i, cost;
3934 struct iv_use *use;
3935 struct cost_pair *old_cp, *new_cp;
3937 *delta = NULL;
3938 for (i = 0; i < ivs->upto; i++)
3940 use = iv_use (data, i);
3941 old_cp = iv_ca_cand_for_use (ivs, use);
3943 if (old_cp
3944 && old_cp->cand == cand)
3945 continue;
3947 new_cp = get_use_iv_cost (data, use, cand);
3948 if (!new_cp)
3949 continue;
3951 if (!iv_ca_has_deps (ivs, new_cp))
3952 continue;
3954 if (!cheaper_cost_pair (new_cp, old_cp))
3955 continue;
3957 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
3960 iv_ca_delta_commit (data, ivs, *delta, true);
3961 cost = iv_ca_cost (ivs);
3962 iv_ca_delta_commit (data, ivs, *delta, false);
3964 return cost;
3967 /* Try narrowing set IVS by removing CAND. Return the cost of
3968 the new set and store the differences in DELTA. */
3970 static unsigned
3971 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
3972 struct iv_cand *cand, struct iv_ca_delta **delta)
3974 unsigned i, ci;
3975 struct iv_use *use;
3976 struct cost_pair *old_cp, *new_cp, *cp;
3977 bitmap_iterator bi;
3978 struct iv_cand *cnd;
3979 unsigned cost;
3981 *delta = NULL;
3982 for (i = 0; i < n_iv_uses (data); i++)
3984 use = iv_use (data, i);
3986 old_cp = iv_ca_cand_for_use (ivs, use);
3987 if (old_cp->cand != cand)
3988 continue;
3990 new_cp = NULL;
3992 if (data->consider_all_candidates)
3994 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
3996 if (ci == cand->id)
3997 continue;
3999 cnd = iv_cand (data, ci);
4001 cp = get_use_iv_cost (data, use, cnd);
4002 if (!cp)
4003 continue;
4004 if (!iv_ca_has_deps (ivs, cp))
4005 continue;
4007 if (!cheaper_cost_pair (cp, new_cp))
4008 continue;
4010 new_cp = cp;
4013 else
4015 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4017 if (ci == cand->id)
4018 continue;
4020 cnd = iv_cand (data, ci);
4022 cp = get_use_iv_cost (data, use, cnd);
4023 if (!cp)
4024 continue;
4025 if (!iv_ca_has_deps (ivs, cp))
4026 continue;
4028 if (!cheaper_cost_pair (cp, new_cp))
4029 continue;
4031 new_cp = cp;
4035 if (!new_cp)
4037 iv_ca_delta_free (delta);
4038 return INFTY;
4041 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4044 iv_ca_delta_commit (data, ivs, *delta, true);
4045 cost = iv_ca_cost (ivs);
4046 iv_ca_delta_commit (data, ivs, *delta, false);
4048 return cost;
4051 /* Tries to extend the sets IVS in the best possible way in order
4052 to express the USE. */
4054 static bool
4055 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4056 struct iv_use *use)
4058 unsigned best_cost, act_cost;
4059 unsigned i;
4060 bitmap_iterator bi;
4061 struct iv_cand *cand;
4062 struct iv_ca_delta *best_delta = NULL, *act_delta;
4063 struct cost_pair *cp;
4065 iv_ca_add_use (data, ivs, use);
4066 best_cost = iv_ca_cost (ivs);
4068 cp = iv_ca_cand_for_use (ivs, use);
4069 if (cp)
4071 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4072 iv_ca_set_no_cp (data, ivs, use);
4075 /* First try important candidates. Only if it fails, try the specific ones.
4076 Rationale -- in loops with many variables the best choice often is to use
4077 just one generic biv. If we added here many ivs specific to the uses,
4078 the optimization algorithm later would be likely to get stuck in a local
4079 minimum, thus causing us to create too many ivs. The approach from
4080 few ivs to more seems more likely to be successful -- starting from few
4081 ivs, replacing an expensive use by a specific iv should always be a
4082 win. */
4083 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4085 cand = iv_cand (data, i);
4087 if (iv_ca_cand_used_p (ivs, cand))
4088 continue;
4090 cp = get_use_iv_cost (data, use, cand);
4091 if (!cp)
4092 continue;
4094 iv_ca_set_cp (data, ivs, use, cp);
4095 act_cost = iv_ca_extend (data, ivs, cand, &act_delta);
4096 iv_ca_set_no_cp (data, ivs, use);
4097 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4099 if (act_cost < best_cost)
4101 best_cost = act_cost;
4103 iv_ca_delta_free (&best_delta);
4104 best_delta = act_delta;
4106 else
4107 iv_ca_delta_free (&act_delta);
4110 if (best_cost == INFTY)
4112 for (i = 0; i < use->n_map_members; i++)
4114 cp = use->cost_map + i;
4115 cand = cp->cand;
4116 if (!cand)
4117 continue;
4119 /* Already tried this. */
4120 if (cand->important)
4121 continue;
4123 if (iv_ca_cand_used_p (ivs, cand))
4124 continue;
4126 act_delta = NULL;
4127 iv_ca_set_cp (data, ivs, use, cp);
4128 act_cost = iv_ca_extend (data, ivs, cand, &act_delta);
4129 iv_ca_set_no_cp (data, ivs, use);
4130 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4131 cp, act_delta);
4133 if (act_cost < best_cost)
4135 best_cost = act_cost;
4137 if (best_delta)
4138 iv_ca_delta_free (&best_delta);
4139 best_delta = act_delta;
4141 else
4142 iv_ca_delta_free (&act_delta);
4146 iv_ca_delta_commit (data, ivs, best_delta, true);
4147 iv_ca_delta_free (&best_delta);
4149 return (best_cost != INFTY);
4152 /* Finds an initial assignment of candidates to uses. */
4154 static struct iv_ca *
4155 get_initial_solution (struct ivopts_data *data)
4157 struct iv_ca *ivs = iv_ca_new (data);
4158 unsigned i;
4160 for (i = 0; i < n_iv_uses (data); i++)
4161 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4163 iv_ca_free (&ivs);
4164 return NULL;
4167 return ivs;
4170 /* Tries to improve set of induction variables IVS. */
4172 static bool
4173 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4175 unsigned i, acost, best_cost = iv_ca_cost (ivs);
4176 struct iv_ca_delta *best_delta = NULL, *act_delta;
4177 struct iv_cand *cand;
4179 /* Try altering the set of induction variables by one. */
4180 for (i = 0; i < n_iv_cands (data); i++)
4182 cand = iv_cand (data, i);
4184 if (iv_ca_cand_used_p (ivs, cand))
4185 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4186 else
4187 acost = iv_ca_extend (data, ivs, cand, &act_delta);
4189 if (acost < best_cost)
4191 best_cost = acost;
4192 if (best_delta)
4193 iv_ca_delta_free (&best_delta);
4194 best_delta = act_delta;
4196 else
4197 iv_ca_delta_free (&act_delta);
4200 if (!best_delta)
4201 return false;
4203 iv_ca_delta_commit (data, ivs, best_delta, true);
4204 iv_ca_delta_free (&best_delta);
4205 return true;
4208 /* Attempts to find the optimal set of induction variables. We do simple
4209 greedy heuristic -- we try to replace at most one candidate in the selected
4210 solution and remove the unused ivs while this improves the cost. */
4212 static struct iv_ca *
4213 find_optimal_iv_set (struct ivopts_data *data)
4215 unsigned i;
4216 struct iv_ca *set;
4217 struct iv_use *use;
4219 /* Get the initial solution. */
4220 set = get_initial_solution (data);
4221 if (!set)
4223 if (dump_file && (dump_flags & TDF_DETAILS))
4224 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4225 return NULL;
4228 if (dump_file && (dump_flags & TDF_DETAILS))
4230 fprintf (dump_file, "Initial set of candidates:\n");
4231 iv_ca_dump (data, dump_file, set);
4234 while (try_improve_iv_set (data, set))
4236 if (dump_file && (dump_flags & TDF_DETAILS))
4238 fprintf (dump_file, "Improved to:\n");
4239 iv_ca_dump (data, dump_file, set);
4243 if (dump_file && (dump_flags & TDF_DETAILS))
4244 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4246 for (i = 0; i < n_iv_uses (data); i++)
4248 use = iv_use (data, i);
4249 use->selected = iv_ca_cand_for_use (set, use)->cand;
4252 return set;
4255 /* Creates a new induction variable corresponding to CAND. */
4257 static void
4258 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4260 block_stmt_iterator incr_pos;
4261 tree base;
4262 bool after = false;
4264 if (!cand->iv)
4265 return;
4267 switch (cand->pos)
4269 case IP_NORMAL:
4270 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4271 break;
4273 case IP_END:
4274 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4275 after = true;
4276 break;
4278 case IP_ORIGINAL:
4279 /* Mark that the iv is preserved. */
4280 name_info (data, cand->var_before)->preserve_biv = true;
4281 name_info (data, cand->var_after)->preserve_biv = true;
4283 /* Rewrite the increment so that it uses var_before directly. */
4284 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4286 return;
4289 gimple_add_tmp_var (cand->var_before);
4290 add_referenced_tmp_var (cand->var_before);
4292 base = unshare_expr (cand->iv->base);
4294 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4295 &incr_pos, after, &cand->var_before, &cand->var_after);
4298 /* Creates new induction variables described in SET. */
4300 static void
4301 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4303 unsigned i;
4304 struct iv_cand *cand;
4305 bitmap_iterator bi;
4307 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4309 cand = iv_cand (data, i);
4310 create_new_iv (data, cand);
4314 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4315 is true, remove also the ssa name defined by the statement. */
4317 static void
4318 remove_statement (tree stmt, bool including_defined_name)
4320 if (TREE_CODE (stmt) == PHI_NODE)
4322 if (!including_defined_name)
4324 /* Prevent the ssa name defined by the statement from being removed. */
4325 SET_PHI_RESULT (stmt, NULL);
4327 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4329 else
4331 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4333 bsi_remove (&bsi);
4337 /* Rewrites USE (definition of iv used in a nonlinear expression)
4338 using candidate CAND. */
4340 static void
4341 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4342 struct iv_use *use, struct iv_cand *cand)
4344 tree comp = unshare_expr (get_computation (data->current_loop,
4345 use, cand));
4346 tree op, stmts, tgt, ass;
4347 block_stmt_iterator bsi, pbsi;
4349 switch (TREE_CODE (use->stmt))
4351 case PHI_NODE:
4352 tgt = PHI_RESULT (use->stmt);
4354 /* If we should keep the biv, do not replace it. */
4355 if (name_info (data, tgt)->preserve_biv)
4356 return;
4358 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4359 while (!bsi_end_p (pbsi)
4360 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4362 bsi = pbsi;
4363 bsi_next (&pbsi);
4365 break;
4367 case MODIFY_EXPR:
4368 tgt = TREE_OPERAND (use->stmt, 0);
4369 bsi = bsi_for_stmt (use->stmt);
4370 break;
4372 default:
4373 gcc_unreachable ();
4376 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4378 if (TREE_CODE (use->stmt) == PHI_NODE)
4380 if (stmts)
4381 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4382 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4383 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4384 remove_statement (use->stmt, false);
4385 SSA_NAME_DEF_STMT (tgt) = ass;
4387 else
4389 if (stmts)
4390 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4391 TREE_OPERAND (use->stmt, 1) = op;
4395 /* Replaces ssa name in index IDX by its basic variable. Callback for
4396 for_each_index. */
4398 static bool
4399 idx_remove_ssa_names (tree base, tree *idx,
4400 void *data ATTRIBUTE_UNUSED)
4402 tree *op;
4404 if (TREE_CODE (*idx) == SSA_NAME)
4405 *idx = SSA_NAME_VAR (*idx);
4407 if (TREE_CODE (base) == ARRAY_REF)
4409 op = &TREE_OPERAND (base, 2);
4410 if (*op
4411 && TREE_CODE (*op) == SSA_NAME)
4412 *op = SSA_NAME_VAR (*op);
4413 op = &TREE_OPERAND (base, 3);
4414 if (*op
4415 && TREE_CODE (*op) == SSA_NAME)
4416 *op = SSA_NAME_VAR (*op);
4419 return true;
4422 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4424 static tree
4425 unshare_and_remove_ssa_names (tree ref)
4427 ref = unshare_expr (ref);
4428 for_each_index (&ref, idx_remove_ssa_names, NULL);
4430 return ref;
4433 /* Rewrites base of memory access OP with expression WITH in statement
4434 pointed to by BSI. */
4436 static void
4437 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4439 tree bvar, var, new_var, new_name, copy, name;
4440 tree orig;
4442 var = bvar = get_base_address (*op);
4444 if (!var || TREE_CODE (with) != SSA_NAME)
4445 goto do_rewrite;
4447 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4448 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4449 if (TREE_CODE (var) == INDIRECT_REF)
4450 var = TREE_OPERAND (var, 0);
4451 if (TREE_CODE (var) == SSA_NAME)
4453 name = var;
4454 var = SSA_NAME_VAR (var);
4456 else if (DECL_P (var))
4457 name = NULL_TREE;
4458 else
4459 goto do_rewrite;
4461 if (var_ann (var)->type_mem_tag)
4462 var = var_ann (var)->type_mem_tag;
4464 /* We need to add a memory tag for the variable. But we do not want
4465 to add it to the temporary used for the computations, since this leads
4466 to problems in redundancy elimination when there are common parts
4467 in two computations referring to the different arrays. So we copy
4468 the variable to a new temporary. */
4469 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4470 if (name)
4471 new_name = duplicate_ssa_name (name, copy);
4472 else
4474 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4475 add_referenced_tmp_var (new_var);
4476 var_ann (new_var)->type_mem_tag = var;
4477 new_name = make_ssa_name (new_var, copy);
4479 TREE_OPERAND (copy, 0) = new_name;
4480 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4481 with = new_name;
4483 do_rewrite:
4485 orig = NULL_TREE;
4486 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4487 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4489 if (TREE_CODE (*op) == INDIRECT_REF)
4490 orig = REF_ORIGINAL (*op);
4491 if (!orig)
4492 orig = unshare_and_remove_ssa_names (*op);
4494 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4496 /* Record the original reference, for purposes of alias analysis. */
4497 REF_ORIGINAL (*op) = orig;
4500 /* Rewrites USE (address that is an iv) using candidate CAND. */
4502 static void
4503 rewrite_use_address (struct ivopts_data *data,
4504 struct iv_use *use, struct iv_cand *cand)
4506 tree comp = unshare_expr (get_computation (data->current_loop,
4507 use, cand));
4508 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4509 tree stmts;
4510 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4512 if (stmts)
4513 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4515 rewrite_address_base (&bsi, use->op_p, op);
4518 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4519 candidate CAND. */
4521 static void
4522 rewrite_use_compare (struct ivopts_data *data,
4523 struct iv_use *use, struct iv_cand *cand)
4525 tree comp;
4526 tree *op_p, cond, op, stmts, bound;
4527 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4528 enum tree_code compare;
4530 if (may_eliminate_iv (data->current_loop,
4531 use, cand, &compare, &bound))
4533 op = force_gimple_operand (unshare_expr (bound), &stmts,
4534 true, NULL_TREE);
4536 if (stmts)
4537 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4539 *use->op_p = build2 (compare, boolean_type_node,
4540 var_at_stmt (data->current_loop,
4541 cand, use->stmt), op);
4542 modify_stmt (use->stmt);
4543 return;
4546 /* The induction variable elimination failed; just express the original
4547 giv. */
4548 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4550 cond = *use->op_p;
4551 op_p = &TREE_OPERAND (cond, 0);
4552 if (TREE_CODE (*op_p) != SSA_NAME
4553 || zero_p (get_iv (data, *op_p)->step))
4554 op_p = &TREE_OPERAND (cond, 1);
4556 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4557 if (stmts)
4558 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4560 *op_p = op;
4563 /* Ensure that operand *OP_P may be used at the end of EXIT without
4564 violating loop closed ssa form. */
4566 static void
4567 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4569 basic_block def_bb;
4570 struct loop *def_loop;
4571 tree phi, use;
4573 use = USE_FROM_PTR (op_p);
4574 if (TREE_CODE (use) != SSA_NAME)
4575 return;
4577 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4578 if (!def_bb)
4579 return;
4581 def_loop = def_bb->loop_father;
4582 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4583 return;
4585 /* Try finding a phi node that copies the value out of the loop. */
4586 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4587 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4588 break;
4590 if (!phi)
4592 /* Create such a phi node. */
4593 tree new_name = duplicate_ssa_name (use, NULL);
4595 phi = create_phi_node (new_name, exit->dest);
4596 SSA_NAME_DEF_STMT (new_name) = phi;
4597 add_phi_arg (&phi, use, exit);
4600 SET_USE (op_p, PHI_RESULT (phi));
4603 /* Ensure that operands of STMT may be used at the end of EXIT without
4604 violating loop closed ssa form. */
4606 static void
4607 protect_loop_closed_ssa_form (edge exit, tree stmt)
4609 use_optype uses;
4610 vuse_optype vuses;
4611 v_may_def_optype v_may_defs;
4612 unsigned i;
4614 get_stmt_operands (stmt);
4616 uses = STMT_USE_OPS (stmt);
4617 for (i = 0; i < NUM_USES (uses); i++)
4618 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4620 vuses = STMT_VUSE_OPS (stmt);
4621 for (i = 0; i < NUM_VUSES (vuses); i++)
4622 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4624 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4625 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4626 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4629 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4630 so that they are emitted on the correct place, and so that the loop closed
4631 ssa form is preserved. */
4633 static void
4634 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4636 tree_stmt_iterator tsi;
4637 block_stmt_iterator bsi;
4638 tree phi, stmt, def, next;
4640 if (EDGE_COUNT (exit->dest->preds) > 1)
4641 split_loop_exit_edge (exit);
4643 if (TREE_CODE (stmts) == STATEMENT_LIST)
4645 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4646 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4648 else
4649 protect_loop_closed_ssa_form (exit, stmts);
4651 /* Ensure there is label in exit->dest, so that we can
4652 insert after it. */
4653 tree_block_label (exit->dest);
4654 bsi = bsi_after_labels (exit->dest);
4655 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4657 if (!op)
4658 return;
4660 for (phi = phi_nodes (exit->dest); phi; phi = next)
4662 next = PHI_CHAIN (phi);
4664 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4666 def = PHI_RESULT (phi);
4667 remove_statement (phi, false);
4668 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4669 def, op);
4670 SSA_NAME_DEF_STMT (def) = stmt;
4671 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4676 /* Rewrites the final value of USE (that is only needed outside of the loop)
4677 using candidate CAND. */
4679 static void
4680 rewrite_use_outer (struct ivopts_data *data,
4681 struct iv_use *use, struct iv_cand *cand)
4683 edge exit;
4684 tree value, op, stmts, tgt;
4685 tree phi;
4687 switch (TREE_CODE (use->stmt))
4689 case PHI_NODE:
4690 tgt = PHI_RESULT (use->stmt);
4691 break;
4692 case MODIFY_EXPR:
4693 tgt = TREE_OPERAND (use->stmt, 0);
4694 break;
4695 default:
4696 gcc_unreachable ();
4699 exit = single_dom_exit (data->current_loop);
4701 if (exit)
4703 if (!cand->iv)
4705 bool ok = may_replace_final_value (data->current_loop, use, &value);
4706 gcc_assert (ok);
4708 else
4709 value = get_computation_at (data->current_loop,
4710 use, cand, last_stmt (exit->src));
4712 value = unshare_expr (value);
4713 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4715 /* If we will preserve the iv anyway and we would need to perform
4716 some computation to replace the final value, do nothing. */
4717 if (stmts && name_info (data, tgt)->preserve_biv)
4718 return;
4720 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4722 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4724 if (USE_FROM_PTR (use_p) == tgt)
4725 SET_USE (use_p, op);
4728 if (stmts)
4729 compute_phi_arg_on_exit (exit, stmts, op);
4731 /* Enable removal of the statement. We cannot remove it directly,
4732 since we may still need the aliasing information attached to the
4733 ssa name defined by it. */
4734 name_info (data, tgt)->iv->have_use_for = false;
4735 return;
4738 /* If the variable is going to be preserved anyway, there is nothing to
4739 do. */
4740 if (name_info (data, tgt)->preserve_biv)
4741 return;
4743 /* Otherwise we just need to compute the iv. */
4744 rewrite_use_nonlinear_expr (data, use, cand);
4747 /* Rewrites USE using candidate CAND. */
4749 static void
4750 rewrite_use (struct ivopts_data *data,
4751 struct iv_use *use, struct iv_cand *cand)
4753 switch (use->type)
4755 case USE_NONLINEAR_EXPR:
4756 rewrite_use_nonlinear_expr (data, use, cand);
4757 break;
4759 case USE_OUTER:
4760 rewrite_use_outer (data, use, cand);
4761 break;
4763 case USE_ADDRESS:
4764 rewrite_use_address (data, use, cand);
4765 break;
4767 case USE_COMPARE:
4768 rewrite_use_compare (data, use, cand);
4769 break;
4771 default:
4772 gcc_unreachable ();
4774 modify_stmt (use->stmt);
4777 /* Rewrite the uses using the selected induction variables. */
4779 static void
4780 rewrite_uses (struct ivopts_data *data)
4782 unsigned i;
4783 struct iv_cand *cand;
4784 struct iv_use *use;
4786 for (i = 0; i < n_iv_uses (data); i++)
4788 use = iv_use (data, i);
4789 cand = use->selected;
4790 gcc_assert (cand);
4792 rewrite_use (data, use, cand);
4796 /* Removes the ivs that are not used after rewriting. */
4798 static void
4799 remove_unused_ivs (struct ivopts_data *data)
4801 unsigned j;
4802 bitmap_iterator bi;
4804 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4806 struct version_info *info;
4808 info = ver_info (data, j);
4809 if (info->iv
4810 && !zero_p (info->iv->step)
4811 && !info->inv_id
4812 && !info->iv->have_use_for
4813 && !info->preserve_biv)
4814 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4818 /* Frees data allocated by the optimization of a single loop. */
4820 static void
4821 free_loop_data (struct ivopts_data *data)
4823 unsigned i, j;
4824 bitmap_iterator bi;
4826 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4828 struct version_info *info;
4830 info = ver_info (data, i);
4831 if (info->iv)
4832 free (info->iv);
4833 info->iv = NULL;
4834 info->has_nonlin_use = false;
4835 info->preserve_biv = false;
4836 info->inv_id = 0;
4838 bitmap_clear (data->relevant);
4839 bitmap_clear (data->important_candidates);
4841 for (i = 0; i < n_iv_uses (data); i++)
4843 struct iv_use *use = iv_use (data, i);
4845 free (use->iv);
4846 BITMAP_XFREE (use->related_cands);
4847 for (j = 0; j < use->n_map_members; j++)
4848 if (use->cost_map[j].depends_on)
4849 BITMAP_XFREE (use->cost_map[j].depends_on);
4850 free (use->cost_map);
4851 free (use);
4853 VARRAY_POP_ALL (data->iv_uses);
4855 for (i = 0; i < n_iv_cands (data); i++)
4857 struct iv_cand *cand = iv_cand (data, i);
4859 if (cand->iv)
4860 free (cand->iv);
4861 free (cand);
4863 VARRAY_POP_ALL (data->iv_candidates);
4865 if (data->version_info_size < num_ssa_names)
4867 data->version_info_size = 2 * num_ssa_names;
4868 free (data->version_info);
4869 data->version_info = xcalloc (data->version_info_size,
4870 sizeof (struct version_info));
4873 data->max_inv_id = 0;
4875 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4877 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4879 SET_DECL_RTL (obj, NULL_RTX);
4881 VARRAY_POP_ALL (decl_rtl_to_reset);
4884 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4885 loop tree. */
4887 static void
4888 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4890 unsigned i;
4892 for (i = 1; i < loops->num; i++)
4893 if (loops->parray[i])
4895 free (loops->parray[i]->aux);
4896 loops->parray[i]->aux = NULL;
4899 free_loop_data (data);
4900 free (data->version_info);
4901 BITMAP_XFREE (data->relevant);
4902 BITMAP_XFREE (data->important_candidates);
4904 VARRAY_FREE (decl_rtl_to_reset);
4905 VARRAY_FREE (data->iv_uses);
4906 VARRAY_FREE (data->iv_candidates);
4909 /* Optimizes the LOOP. Returns true if anything changed. */
4911 static bool
4912 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4914 bool changed = false;
4915 struct iv_ca *iv_ca;
4916 edge exit;
4918 data->current_loop = loop;
4920 if (dump_file && (dump_flags & TDF_DETAILS))
4922 fprintf (dump_file, "Processing loop %d\n", loop->num);
4924 exit = single_dom_exit (loop);
4925 if (exit)
4927 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4928 exit->src->index, exit->dest->index);
4929 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4930 fprintf (dump_file, "\n");
4933 fprintf (dump_file, "\n");
4936 /* For each ssa name determines whether it behaves as an induction variable
4937 in some loop. */
4938 if (!find_induction_variables (data))
4939 goto finish;
4941 /* Finds interesting uses (item 1). */
4942 find_interesting_uses (data);
4943 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4944 goto finish;
4946 /* Finds candidates for the induction variables (item 2). */
4947 find_iv_candidates (data);
4949 /* Calculates the costs (item 3, part 1). */
4950 determine_use_iv_costs (data);
4951 determine_iv_costs (data);
4952 determine_set_costs (data);
4954 /* Find the optimal set of induction variables (item 3, part 2). */
4955 iv_ca = find_optimal_iv_set (data);
4956 if (!iv_ca)
4957 goto finish;
4958 changed = true;
4960 /* Create the new induction variables (item 4, part 1). */
4961 create_new_ivs (data, iv_ca);
4962 iv_ca_free (&iv_ca);
4964 /* Rewrite the uses (item 4, part 2). */
4965 rewrite_uses (data);
4967 /* Remove the ivs that are unused after rewriting. */
4968 remove_unused_ivs (data);
4970 loop_commit_inserts ();
4972 /* We have changed the structure of induction variables; it might happen
4973 that definitions in the scev database refer to some of them that were
4974 eliminated. */
4975 scev_reset ();
4977 finish:
4978 free_loop_data (data);
4980 return changed;
4983 /* Main entry point. Optimizes induction variables in LOOPS. */
4985 void
4986 tree_ssa_iv_optimize (struct loops *loops)
4988 struct loop *loop;
4989 struct ivopts_data data;
4991 tree_ssa_iv_optimize_init (loops, &data);
4993 /* Optimize the loops starting with the innermost ones. */
4994 loop = loops->tree_root;
4995 while (loop->inner)
4996 loop = loop->inner;
4998 #ifdef ENABLE_CHECKING
4999 verify_loop_closed_ssa ();
5000 verify_stmts ();
5001 #endif
5003 /* Scan the loops, inner ones first. */
5004 while (loop != loops->tree_root)
5006 if (dump_file && (dump_flags & TDF_DETAILS))
5007 flow_loop_dump (loop, dump_file, NULL, 1);
5009 tree_ssa_iv_optimize_loop (&data, loop);
5011 if (loop->next)
5013 loop = loop->next;
5014 while (loop->inner)
5015 loop = loop->inner;
5017 else
5018 loop = loop->outer;
5021 #ifdef ENABLE_CHECKING
5022 verify_loop_closed_ssa ();
5023 verify_stmts ();
5024 #endif
5026 tree_ssa_iv_optimize_finalize (loops, &data);