* config/darwin.c, config/darwin.h, config/freebsd-spec.h,
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobd35ae517767d0e9a595c67db3dc856586104b896
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
129 unsigned regs_used; /* Number of registers used. */
132 /* Types of uses. */
133 enum use_type
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
141 /* The candidate - cost pair. */
142 struct cost_pair
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
197 struct ivopts_data
199 /* The currently optimized loop. */
200 struct loop *current_loop;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* The number of candidates in the set. */
248 unsigned n_cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
303 static varray_type decl_rtl_to_reset;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
337 /* The data for LOOP. */
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
342 return loop->aux;
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
347 static edge
348 single_dom_exit (struct loop *loop)
350 edge exit = loop->single_exit;
352 if (!exit)
353 return NULL;
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
358 return exit;
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
367 if (iv->ssa_name)
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 if (iv->base_object)
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
412 fprintf (file, "use %d\n", use->id);
414 switch (use->type)
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 if (use->related_cands)
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
460 unsigned i;
461 struct iv_use *use;
463 for (i = 0; i < n_iv_uses (data); i++)
465 use = iv_use (data, i);
467 dump_use (file, use);
468 fprintf (file, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
478 struct iv *iv = cand->iv;
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
483 if (!iv)
485 fprintf (file, " final value replacement\n");
486 return;
489 switch (cand->pos)
491 case IP_NORMAL:
492 fprintf (file, " incremented before exit test\n");
493 break;
495 case IP_END:
496 fprintf (file, " incremented at end\n");
497 break;
499 case IP_ORIGINAL:
500 fprintf (file, " original biv\n");
501 break;
504 dump_iv (file, iv);
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info *
510 ver_info (struct ivopts_data *data, unsigned ver)
512 return data->version_info + ver;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info *
518 name_info (struct ivopts_data *data, tree name)
520 return ver_info (data, SSA_NAME_VERSION (name));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
524 2^BITS. */
526 static bool
527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
528 HOST_WIDE_INT *x)
530 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
531 unsigned HOST_WIDE_INT inv, ex, val;
532 unsigned i;
534 a &= mask;
535 b &= mask;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a & 1) && !(b & 1))
540 a >>= 1;
541 b >>= 1;
542 bits--;
543 mask >>= 1;
546 if (!(b & 1))
548 /* If b is still even, a is odd and there is no such x. */
549 return false;
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
554 inv = 1;
555 ex = b;
556 for (i = 0; i < bits - 1; i++)
558 inv = (inv * ex) & mask;
559 ex = (ex * ex) & mask;
562 val = (a * inv) & mask;
564 gcc_assert (((val * b) & mask) == a);
566 if ((val >> (bits - 1)) & 1)
567 val |= ~mask;
569 *x = val;
571 return true;
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
575 emitted in LOOP. */
577 static bool
578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582 gcc_assert (bb);
584 if (sbb == loop->latch)
585 return true;
587 if (sbb != bb)
588 return false;
590 return stmt == last_stmt (bb);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
596 static bool
597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
600 basic_block stmt_bb = bb_for_stmt (stmt);
601 block_stmt_iterator bsi;
603 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
604 return false;
606 if (stmt_bb != cand_bb)
607 return true;
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 if (bsi_stmt (bsi) == cand->incremented_at)
614 return false;
615 if (bsi_stmt (bsi) == stmt)
616 return true;
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
623 static bool
624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626 switch (cand->pos)
628 case IP_END:
629 return false;
631 case IP_NORMAL:
632 return stmt_after_ip_normal_pos (loop, stmt);
634 case IP_ORIGINAL:
635 return stmt_after_ip_original_pos (cand, stmt);
637 default:
638 gcc_unreachable ();
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
645 static void
646 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
648 unsigned i;
650 data->version_info_size = 2 * num_ssa_names;
651 data->version_info = xcalloc (data->version_info_size,
652 sizeof (struct version_info));
653 data->relevant = BITMAP_XMALLOC ();
654 data->important_candidates = BITMAP_XMALLOC ();
655 data->max_inv_id = 0;
657 for (i = 1; i < loops->num; i++)
658 if (loops->parray[i])
659 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
669 static tree
670 determine_base_object (tree expr)
672 enum tree_code code = TREE_CODE (expr);
673 tree base, obj, op0, op1;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
676 return NULL_TREE;
678 switch (code)
680 case INTEGER_CST:
681 return NULL_TREE;
683 case ADDR_EXPR:
684 obj = TREE_OPERAND (expr, 0);
685 base = get_base_address (obj);
687 if (!base)
688 return fold_convert (ptr_type_node, expr);
690 if (TREE_CODE (base) == INDIRECT_REF)
691 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
693 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
695 case PLUS_EXPR:
696 case MINUS_EXPR:
697 op0 = determine_base_object (TREE_OPERAND (expr, 0));
698 op1 = determine_base_object (TREE_OPERAND (expr, 1));
700 if (!op1)
701 return op0;
703 if (!op0)
704 return (code == PLUS_EXPR
705 ? op1
706 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
708 return fold (build (code, ptr_type_node, op0, op1));
710 default:
711 return fold_convert (ptr_type_node, expr);
715 /* Allocates an induction variable with given initial value BASE and step STEP
716 for loop LOOP. */
718 static struct iv *
719 alloc_iv (tree base, tree step)
721 struct iv *iv = xcalloc (1, sizeof (struct iv));
723 if (step && integer_zerop (step))
724 step = NULL_TREE;
726 iv->base = base;
727 iv->base_object = determine_base_object (base);
728 iv->step = step;
729 iv->biv_p = false;
730 iv->have_use_for = false;
731 iv->use_id = 0;
732 iv->ssa_name = NULL_TREE;
734 return iv;
737 /* Sets STEP and BASE for induction variable IV. */
739 static void
740 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
742 struct version_info *info = name_info (data, iv);
744 gcc_assert (!info->iv);
746 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
747 info->iv = alloc_iv (base, step);
748 info->iv->ssa_name = iv;
751 /* Finds induction variable declaration for VAR. */
753 static struct iv *
754 get_iv (struct ivopts_data *data, tree var)
756 basic_block bb;
758 if (!name_info (data, var)->iv)
760 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
762 if (!bb
763 || !flow_bb_inside_loop_p (data->current_loop, bb))
764 set_iv (data, var, var, NULL_TREE);
767 return name_info (data, var)->iv;
770 /* Determines the step of a biv defined in PHI. */
772 static tree
773 determine_biv_step (tree phi)
775 struct loop *loop = bb_for_stmt (phi)->loop_father;
776 tree name = PHI_RESULT (phi), base, step;
777 tree type = TREE_TYPE (name);
779 if (!is_gimple_reg (name))
780 return NULL_TREE;
782 if (!simple_iv (loop, phi, name, &base, &step))
783 return NULL_TREE;
785 if (!step)
786 return build_int_cst (type, 0);
788 return step;
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
793 static bool
794 abnormal_ssa_name_p (tree exp)
796 if (!exp)
797 return false;
799 if (TREE_CODE (exp) != SSA_NAME)
800 return false;
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
808 static bool
809 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
810 void *data ATTRIBUTE_UNUSED)
812 if (TREE_CODE (base) == ARRAY_REF)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
815 return false;
816 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
817 return false;
820 return !abnormal_ssa_name_p (*index);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
826 static bool
827 contains_abnormal_ssa_name_p (tree expr)
829 enum tree_code code = TREE_CODE (expr);
830 enum tree_code_class class = TREE_CODE_CLASS (code);
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
844 switch (class)
846 case tcc_binary:
847 case tcc_comparison:
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
849 return true;
851 /* Fallthru. */
852 case tcc_unary:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
854 return true;
856 break;
858 default:
859 gcc_unreachable ();
862 return false;
865 /* Finds basic ivs. */
867 static bool
868 find_bivs (struct ivopts_data *data)
870 tree phi, step, type, base;
871 bool found = false;
872 struct loop *loop = data->current_loop;
874 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
877 continue;
879 step = determine_biv_step (phi);
881 if (!step)
882 continue;
883 if (cst_and_fits_in_hwi (step)
884 && int_cst_value (step) == 0)
885 continue;
887 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
888 if (contains_abnormal_ssa_name_p (base))
889 continue;
891 type = TREE_TYPE (PHI_RESULT (phi));
892 base = fold_convert (type, base);
893 step = fold_convert (type, step);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step))
898 continue;
900 set_iv (data, PHI_RESULT (phi), base, step);
901 found = true;
904 return found;
907 /* Marks basic ivs. */
909 static void
910 mark_bivs (struct ivopts_data *data)
912 tree phi, var;
913 struct iv *iv, *incr_iv;
914 struct loop *loop = data->current_loop;
915 basic_block incr_bb;
917 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
919 iv = get_iv (data, PHI_RESULT (phi));
920 if (!iv)
921 continue;
923 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
924 incr_iv = get_iv (data, var);
925 if (!incr_iv)
926 continue;
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
930 if (incr_bb->loop_father != data->current_loop
931 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
932 continue;
934 iv->biv_p = true;
935 incr_iv->biv_p = true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
942 static bool
943 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
944 tree *base, tree *step)
946 tree lhs;
947 struct loop *loop = data->current_loop;
949 *base = NULL_TREE;
950 *step = NULL_TREE;
952 if (TREE_CODE (stmt) != MODIFY_EXPR)
953 return false;
955 lhs = TREE_OPERAND (stmt, 0);
956 if (TREE_CODE (lhs) != SSA_NAME)
957 return false;
959 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
960 return false;
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
964 if (!zero_p (*step)
965 && !cst_and_fits_in_hwi (*step))
966 return false;
968 if (contains_abnormal_ssa_name_p (*base))
969 return false;
971 return true;
974 /* Finds general ivs in statement STMT. */
976 static void
977 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
979 tree base, step;
981 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
982 return;
984 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
987 /* Finds general ivs in basic block BB. */
989 static void
990 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
992 block_stmt_iterator bsi;
994 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
995 find_givs_in_stmt (data, bsi_stmt (bsi));
998 /* Finds general ivs. */
1000 static void
1001 find_givs (struct ivopts_data *data)
1003 struct loop *loop = data->current_loop;
1004 basic_block *body = get_loop_body_in_dom_order (loop);
1005 unsigned i;
1007 for (i = 0; i < loop->num_nodes; i++)
1008 find_givs_in_bb (data, body[i]);
1009 free (body);
1012 /* Determine the number of iterations of the current loop. */
1014 static void
1015 determine_number_of_iterations (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 edge exit = single_dom_exit (loop);
1020 if (!exit)
1021 return;
1023 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1032 unsigned i;
1033 struct loop *loop = data->current_loop;
1034 bitmap_iterator bi;
1036 if (!find_bivs (data))
1037 return false;
1039 find_givs (data);
1040 mark_bivs (data);
1041 determine_number_of_iterations (data);
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1045 if (loop_data (loop)->niter.niter)
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1049 TDF_SLIM);
1050 fprintf (dump_file, "\n");
1052 fprintf (dump_file, " may be zero if ");
1053 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1054 TDF_SLIM);
1055 fprintf (dump_file, "\n");
1057 fprintf (dump_file, " bogus unless ");
1058 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1059 TDF_SLIM);
1060 fprintf (dump_file, "\n");
1061 fprintf (dump_file, "\n");
1064 fprintf (dump_file, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1068 if (ver_info (data, i)->iv)
1069 dump_iv (dump_file, ver_info (data, i)->iv);
1073 return true;
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use *
1079 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1080 tree stmt, enum use_type use_type)
1082 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1084 use->id = n_iv_uses (data);
1085 use->type = use_type;
1086 use->iv = iv;
1087 use->stmt = stmt;
1088 use->op_p = use_p;
1089 use->related_cands = BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1092 caller. */
1093 iv->ssa_name = NULL_TREE;
1095 if (dump_file && (dump_flags & TDF_DETAILS))
1096 dump_use (dump_file, use);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1100 return use;
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1107 static void
1108 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1110 basic_block bb;
1111 struct version_info *info;
1113 if (TREE_CODE (op) != SSA_NAME
1114 || !is_gimple_reg (op))
1115 return;
1117 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1118 if (bb
1119 && flow_bb_inside_loop_p (data->current_loop, bb))
1120 return;
1122 info = name_info (data, op);
1123 info->name = op;
1124 info->has_nonlin_use |= nonlinear_use;
1125 if (!info->inv_id)
1126 info->inv_id = ++data->max_inv_id;
1127 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1130 /* Checks whether the use OP is interesting and if so, records it
1131 as TYPE. */
1133 static struct iv_use *
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1135 enum use_type type)
1137 struct iv *iv;
1138 struct iv *civ;
1139 tree stmt;
1140 struct iv_use *use;
1142 if (TREE_CODE (op) != SSA_NAME)
1143 return NULL;
1145 iv = get_iv (data, op);
1146 if (!iv)
1147 return NULL;
1149 if (iv->have_use_for)
1151 use = iv_use (data, iv->use_id);
1153 gcc_assert (use->type == USE_NONLINEAR_EXPR
1154 || use->type == USE_OUTER);
1156 if (type == USE_NONLINEAR_EXPR)
1157 use->type = USE_NONLINEAR_EXPR;
1158 return use;
1161 if (zero_p (iv->step))
1163 record_invariant (data, op, true);
1164 return NULL;
1166 iv->have_use_for = true;
1168 civ = xmalloc (sizeof (struct iv));
1169 *civ = *iv;
1171 stmt = SSA_NAME_DEF_STMT (op);
1172 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1173 || TREE_CODE (stmt) == MODIFY_EXPR);
1175 use = record_use (data, NULL, civ, stmt, type);
1176 iv->use_id = use->id;
1178 return use;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use *
1184 find_interesting_uses_op (struct ivopts_data *data, tree op)
1186 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1189 /* Records a definition of induction variable OP that is used outside of the
1190 loop. */
1192 static struct iv_use *
1193 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1195 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1201 static void
1202 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1204 tree *op0_p;
1205 tree *op1_p;
1206 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1207 struct iv const_iv;
1208 tree zero = integer_zero_node;
1210 const_iv.step = NULL_TREE;
1212 if (integer_zerop (*cond_p)
1213 || integer_nonzerop (*cond_p))
1214 return;
1216 if (TREE_CODE (*cond_p) == SSA_NAME)
1218 op0_p = cond_p;
1219 op1_p = &zero;
1221 else
1223 op0_p = &TREE_OPERAND (*cond_p, 0);
1224 op1_p = &TREE_OPERAND (*cond_p, 1);
1227 if (TREE_CODE (*op0_p) == SSA_NAME)
1228 iv0 = get_iv (data, *op0_p);
1229 else
1230 iv0 = &const_iv;
1232 if (TREE_CODE (*op1_p) == SSA_NAME)
1233 iv1 = get_iv (data, *op1_p);
1234 else
1235 iv1 = &const_iv;
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1239 (!iv0 || !iv1)
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1244 find_interesting_uses_op (data, *op0_p);
1245 find_interesting_uses_op (data, *op1_p);
1246 return;
1249 if (zero_p (iv0->step) && zero_p (iv1->step))
1251 /* If both are invariants, this is a work for unswitching. */
1252 return;
1255 civ = xmalloc (sizeof (struct iv));
1256 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1257 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1263 bool
1264 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1266 basic_block def_bb;
1267 unsigned i, len;
1269 if (is_gimple_min_invariant (expr))
1270 return true;
1272 if (TREE_CODE (expr) == SSA_NAME)
1274 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1275 if (def_bb
1276 && flow_bb_inside_loop_p (loop, def_bb))
1277 return false;
1279 return true;
1282 if (!EXPR_P (expr))
1283 return false;
1285 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1286 for (i = 0; i < len; i++)
1287 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1288 return false;
1290 return true;
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data *ivopts_data;
1300 tree stmt;
1301 tree *step_p;
1304 static bool
1305 idx_find_step (tree base, tree *idx, void *data)
1307 struct ifs_ivopts_data *dta = data;
1308 struct iv *iv;
1309 tree step, type, iv_type, iv_step, lbound, off;
1310 struct loop *loop = dta->ivopts_data->current_loop;
1312 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1314 return false;
1316 /* If base is a component ref, require that the offset of the reference
1317 be invariant. */
1318 if (TREE_CODE (base) == COMPONENT_REF)
1320 off = component_ref_field_offset (base);
1321 return expr_invariant_in_loop_p (loop, off);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base) == ARRAY_REF)
1330 step = array_ref_element_size (base);
1331 lbound = array_ref_low_bound (base);
1333 if (!expr_invariant_in_loop_p (loop, step)
1334 || !expr_invariant_in_loop_p (loop, lbound))
1335 return false;
1338 if (TREE_CODE (*idx) != SSA_NAME)
1339 return true;
1341 iv = get_iv (dta->ivopts_data, *idx);
1342 if (!iv)
1343 return false;
1345 *idx = iv->base;
1347 if (!iv->step)
1348 return true;
1350 iv_type = TREE_TYPE (iv->base);
1351 type = build_pointer_type (TREE_TYPE (base));
1352 if (TREE_CODE (base) == ARRAY_REF)
1354 step = array_ref_element_size (base);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step) != INTEGER_CST)
1358 return false;
1360 else
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step = build_int_cst (type, 1);
1364 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1365 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1366 type, iv->base, iv->step, dta->stmt);
1367 else
1368 iv_step = fold_convert (iv_type, iv->step);
1370 if (!iv_step)
1372 /* The index might wrap. */
1373 return false;
1376 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1378 if (!*dta->step_p)
1379 *dta->step_p = step;
1380 else
1381 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1382 *dta->step_p, step);
1384 return true;
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1390 static bool
1391 idx_record_use (tree base, tree *idx,
1392 void *data)
1394 find_interesting_uses_op (data, *idx);
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 find_interesting_uses_op (data, array_ref_element_size (base));
1398 find_interesting_uses_op (data, array_ref_low_bound (base));
1400 return true;
1403 /* Returns true if memory reference REF may be unaligned. */
1405 static bool
1406 may_be_unaligned_p (tree ref)
1408 tree base;
1409 tree base_type;
1410 HOST_WIDE_INT bitsize;
1411 HOST_WIDE_INT bitpos;
1412 tree toffset;
1413 enum machine_mode mode;
1414 int unsignedp, volatilep;
1415 unsigned base_align;
1417 /* The test below is basically copy of what expr.c:normal_inner_ref
1418 does to check whether the object must be loaded by parts when
1419 STRICT_ALIGNMENT is true. */
1420 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1421 &unsignedp, &volatilep, true);
1422 base_type = TREE_TYPE (base);
1423 base_align = TYPE_ALIGN (base_type);
1425 if (mode != BLKmode
1426 && (base_align < GET_MODE_ALIGNMENT (mode)
1427 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1428 || bitpos % BITS_PER_UNIT != 0))
1429 return true;
1431 return false;
1434 /* Finds addresses in *OP_P inside STMT. */
1436 static void
1437 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1439 tree base = unshare_expr (*op_p), step = NULL;
1440 struct iv *civ;
1441 struct ifs_ivopts_data ifs_ivopts_data;
1443 /* Ignore bitfields for now. Not really something terribly complicated
1444 to handle. TODO. */
1445 if (TREE_CODE (base) == COMPONENT_REF
1446 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1447 goto fail;
1449 if (STRICT_ALIGNMENT
1450 && may_be_unaligned_p (base))
1451 goto fail;
1453 ifs_ivopts_data.ivopts_data = data;
1454 ifs_ivopts_data.stmt = stmt;
1455 ifs_ivopts_data.step_p = &step;
1456 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1457 || zero_p (step))
1458 goto fail;
1460 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1461 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1463 if (TREE_CODE (base) == INDIRECT_REF)
1464 base = TREE_OPERAND (base, 0);
1465 else
1466 base = build_addr (base);
1468 civ = alloc_iv (base, step);
1469 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1470 return;
1472 fail:
1473 for_each_index (op_p, idx_record_use, data);
1476 /* Finds and records invariants used in STMT. */
1478 static void
1479 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1481 use_optype uses = NULL;
1482 unsigned i, n;
1483 tree op;
1485 if (TREE_CODE (stmt) == PHI_NODE)
1486 n = PHI_NUM_ARGS (stmt);
1487 else
1489 get_stmt_operands (stmt);
1490 uses = STMT_USE_OPS (stmt);
1491 n = NUM_USES (uses);
1494 for (i = 0; i < n; i++)
1496 if (TREE_CODE (stmt) == PHI_NODE)
1497 op = PHI_ARG_DEF (stmt, i);
1498 else
1499 op = USE_OP (uses, i);
1501 record_invariant (data, op, false);
1505 /* Finds interesting uses of induction variables in the statement STMT. */
1507 static void
1508 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1510 struct iv *iv;
1511 tree op, lhs, rhs;
1512 use_optype uses = NULL;
1513 unsigned i, n;
1515 find_invariants_stmt (data, stmt);
1517 if (TREE_CODE (stmt) == COND_EXPR)
1519 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1520 return;
1523 if (TREE_CODE (stmt) == MODIFY_EXPR)
1525 lhs = TREE_OPERAND (stmt, 0);
1526 rhs = TREE_OPERAND (stmt, 1);
1528 if (TREE_CODE (lhs) == SSA_NAME)
1530 /* If the statement defines an induction variable, the uses are not
1531 interesting by themselves. */
1533 iv = get_iv (data, lhs);
1535 if (iv && !zero_p (iv->step))
1536 return;
1539 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1541 case tcc_comparison:
1542 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1543 return;
1545 case tcc_reference:
1546 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1547 if (REFERENCE_CLASS_P (lhs))
1548 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1549 return;
1551 default: ;
1554 if (REFERENCE_CLASS_P (lhs)
1555 && is_gimple_val (rhs))
1557 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1558 find_interesting_uses_op (data, rhs);
1559 return;
1562 /* TODO -- we should also handle address uses of type
1564 memory = call (whatever);
1568 call (memory). */
1571 if (TREE_CODE (stmt) == PHI_NODE
1572 && bb_for_stmt (stmt) == data->current_loop->header)
1574 lhs = PHI_RESULT (stmt);
1575 iv = get_iv (data, lhs);
1577 if (iv && !zero_p (iv->step))
1578 return;
1581 if (TREE_CODE (stmt) == PHI_NODE)
1582 n = PHI_NUM_ARGS (stmt);
1583 else
1585 uses = STMT_USE_OPS (stmt);
1586 n = NUM_USES (uses);
1589 for (i = 0; i < n; i++)
1591 if (TREE_CODE (stmt) == PHI_NODE)
1592 op = PHI_ARG_DEF (stmt, i);
1593 else
1594 op = USE_OP (uses, i);
1596 if (TREE_CODE (op) != SSA_NAME)
1597 continue;
1599 iv = get_iv (data, op);
1600 if (!iv)
1601 continue;
1603 find_interesting_uses_op (data, op);
1607 /* Finds interesting uses of induction variables outside of loops
1608 on loop exit edge EXIT. */
1610 static void
1611 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1613 tree phi, def;
1615 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1617 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1618 find_interesting_uses_outer (data, def);
1622 /* Finds uses of the induction variables that are interesting. */
1624 static void
1625 find_interesting_uses (struct ivopts_data *data)
1627 basic_block bb;
1628 block_stmt_iterator bsi;
1629 tree phi;
1630 basic_block *body = get_loop_body (data->current_loop);
1631 unsigned i;
1632 struct version_info *info;
1633 edge e;
1635 if (dump_file && (dump_flags & TDF_DETAILS))
1636 fprintf (dump_file, "Uses:\n\n");
1638 for (i = 0; i < data->current_loop->num_nodes; i++)
1640 edge_iterator ei;
1641 bb = body[i];
1643 FOR_EACH_EDGE (e, ei, bb->succs)
1644 if (e->dest != EXIT_BLOCK_PTR
1645 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1646 find_interesting_uses_outside (data, e);
1648 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1649 find_interesting_uses_stmt (data, phi);
1650 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1651 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1656 bitmap_iterator bi;
1658 fprintf (dump_file, "\n");
1660 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1662 info = ver_info (data, i);
1663 if (info->inv_id)
1665 fprintf (dump_file, " ");
1666 print_generic_expr (dump_file, info->name, TDF_SLIM);
1667 fprintf (dump_file, " is invariant (%d)%s\n",
1668 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1672 fprintf (dump_file, "\n");
1675 free (body);
1678 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1679 position to POS. If USE is not NULL, the candidate is set as related to
1680 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1681 replacement of the final value of the iv by a direct computation. */
1683 static struct iv_cand *
1684 add_candidate_1 (struct ivopts_data *data,
1685 tree base, tree step, bool important, enum iv_position pos,
1686 struct iv_use *use, tree incremented_at)
1688 unsigned i;
1689 struct iv_cand *cand = NULL;
1690 tree type;
1692 if (base)
1694 type = TREE_TYPE (base);
1695 if (!TYPE_UNSIGNED (type))
1697 type = unsigned_type_for (type);
1698 base = fold_convert (type, base);
1699 if (step)
1700 step = fold_convert (type, step);
1704 for (i = 0; i < n_iv_cands (data); i++)
1706 cand = iv_cand (data, i);
1708 if (cand->pos != pos)
1709 continue;
1711 if (cand->incremented_at != incremented_at)
1712 continue;
1714 if (!cand->iv)
1716 if (!base && !step)
1717 break;
1719 continue;
1722 if (!base && !step)
1723 continue;
1725 if (!operand_equal_p (base, cand->iv->base, 0))
1726 continue;
1728 if (zero_p (cand->iv->step))
1730 if (zero_p (step))
1731 break;
1733 else
1735 if (step && operand_equal_p (step, cand->iv->step, 0))
1736 break;
1740 if (i == n_iv_cands (data))
1742 cand = xcalloc (1, sizeof (struct iv_cand));
1743 cand->id = i;
1745 if (!base && !step)
1746 cand->iv = NULL;
1747 else
1748 cand->iv = alloc_iv (base, step);
1750 cand->pos = pos;
1751 if (pos != IP_ORIGINAL && cand->iv)
1753 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1754 cand->var_after = cand->var_before;
1756 cand->important = important;
1757 cand->incremented_at = incremented_at;
1758 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1760 if (dump_file && (dump_flags & TDF_DETAILS))
1761 dump_cand (dump_file, cand);
1764 if (important && !cand->important)
1766 cand->important = true;
1767 if (dump_file && (dump_flags & TDF_DETAILS))
1768 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1771 if (use)
1773 bitmap_set_bit (use->related_cands, i);
1774 if (dump_file && (dump_flags & TDF_DETAILS))
1775 fprintf (dump_file, "Candidate %d is related to use %d\n",
1776 cand->id, use->id);
1779 return cand;
1782 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1783 position to POS. If USE is not NULL, the candidate is set as related to
1784 it. The candidate computation is scheduled on all available positions. */
1786 static void
1787 add_candidate (struct ivopts_data *data,
1788 tree base, tree step, bool important, struct iv_use *use)
1790 if (ip_normal_pos (data->current_loop))
1791 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1792 if (ip_end_pos (data->current_loop))
1793 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1796 /* Adds standard iv candidates. */
1798 static void
1799 add_standard_iv_candidates (struct ivopts_data *data)
1801 /* Add 0 + 1 * iteration candidate. */
1802 add_candidate (data,
1803 build_int_cst (unsigned_intSI_type_node, 0),
1804 build_int_cst (unsigned_intSI_type_node, 1),
1805 true, NULL);
1807 /* The same for a long type if it is still fast enough. */
1808 if (BITS_PER_WORD > 32)
1809 add_candidate (data,
1810 build_int_cst (unsigned_intDI_type_node, 0),
1811 build_int_cst (unsigned_intDI_type_node, 1),
1812 true, NULL);
1816 /* Adds candidates bases on the old induction variable IV. */
1818 static void
1819 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1821 tree phi, def;
1822 struct iv_cand *cand;
1824 add_candidate (data, iv->base, iv->step, true, NULL);
1826 /* The same, but with initial value zero. */
1827 add_candidate (data,
1828 build_int_cst (TREE_TYPE (iv->base), 0),
1829 iv->step, true, NULL);
1831 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1832 if (TREE_CODE (phi) == PHI_NODE)
1834 /* Additionally record the possibility of leaving the original iv
1835 untouched. */
1836 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1837 cand = add_candidate_1 (data,
1838 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1839 SSA_NAME_DEF_STMT (def));
1840 cand->var_before = iv->ssa_name;
1841 cand->var_after = def;
1845 /* Adds candidates based on the old induction variables. */
1847 static void
1848 add_old_ivs_candidates (struct ivopts_data *data)
1850 unsigned i;
1851 struct iv *iv;
1852 bitmap_iterator bi;
1854 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1856 iv = ver_info (data, i)->iv;
1857 if (iv && iv->biv_p && !zero_p (iv->step))
1858 add_old_iv_candidates (data, iv);
1862 /* Adds candidates based on the value of the induction variable IV and USE. */
1864 static void
1865 add_iv_value_candidates (struct ivopts_data *data,
1866 struct iv *iv, struct iv_use *use)
1868 add_candidate (data, iv->base, iv->step, false, use);
1870 /* The same, but with initial value zero. */
1871 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1872 iv->step, false, use);
1875 /* Adds candidates based on the address IV and USE. */
1877 static void
1878 add_address_candidates (struct ivopts_data *data,
1879 struct iv *iv, struct iv_use *use)
1881 tree base, abase, tmp, *act;
1883 /* First, the trivial choices. */
1884 add_iv_value_candidates (data, iv, use);
1886 /* Second, try removing the COMPONENT_REFs. */
1887 if (TREE_CODE (iv->base) == ADDR_EXPR)
1889 base = TREE_OPERAND (iv->base, 0);
1890 while (TREE_CODE (base) == COMPONENT_REF
1891 || (TREE_CODE (base) == ARRAY_REF
1892 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1893 base = TREE_OPERAND (base, 0);
1895 if (base != TREE_OPERAND (iv->base, 0))
1897 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1898 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1900 if (TREE_CODE (base) == INDIRECT_REF)
1901 base = TREE_OPERAND (base, 0);
1902 else
1903 base = build_addr (base);
1904 add_candidate (data, base, iv->step, false, use);
1908 /* Third, try removing the constant offset. */
1909 abase = iv->base;
1910 while (TREE_CODE (abase) == PLUS_EXPR
1911 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1912 abase = TREE_OPERAND (abase, 0);
1913 /* We found the offset, so make the copy of the non-shared part and
1914 remove it. */
1915 if (TREE_CODE (abase) == PLUS_EXPR)
1917 tmp = iv->base;
1918 act = &base;
1920 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1922 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1923 NULL_TREE, TREE_OPERAND (tmp, 1));
1924 act = &TREE_OPERAND (*act, 0);
1926 *act = TREE_OPERAND (tmp, 0);
1928 add_candidate (data, base, iv->step, false, use);
1932 /* Possibly adds pseudocandidate for replacing the final value of USE by
1933 a direct computation. */
1935 static void
1936 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1938 struct tree_niter_desc *niter;
1939 struct loop *loop = data->current_loop;
1941 /* We must know where we exit the loop and how many times does it roll. */
1942 if (!single_dom_exit (loop))
1943 return;
1945 niter = &loop_data (loop)->niter;
1946 if (!niter->niter
1947 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1948 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1949 return;
1951 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1954 /* Adds candidates based on the uses. */
1956 static void
1957 add_derived_ivs_candidates (struct ivopts_data *data)
1959 unsigned i;
1961 for (i = 0; i < n_iv_uses (data); i++)
1963 struct iv_use *use = iv_use (data, i);
1965 if (!use)
1966 continue;
1968 switch (use->type)
1970 case USE_NONLINEAR_EXPR:
1971 case USE_COMPARE:
1972 /* Just add the ivs based on the value of the iv used here. */
1973 add_iv_value_candidates (data, use->iv, use);
1974 break;
1976 case USE_OUTER:
1977 add_iv_value_candidates (data, use->iv, use);
1979 /* Additionally, add the pseudocandidate for the possibility to
1980 replace the final value by a direct computation. */
1981 add_iv_outer_candidates (data, use);
1982 break;
1984 case USE_ADDRESS:
1985 add_address_candidates (data, use->iv, use);
1986 break;
1988 default:
1989 gcc_unreachable ();
1994 /* Record important candidates and add them to related_cands bitmaps
1995 if needed. */
1997 static void
1998 record_important_candidates (struct ivopts_data *data)
2000 unsigned i;
2001 struct iv_use *use;
2003 for (i = 0; i < n_iv_cands (data); i++)
2005 struct iv_cand *cand = iv_cand (data, i);
2007 if (cand->important)
2008 bitmap_set_bit (data->important_candidates, i);
2011 data->consider_all_candidates = (n_iv_cands (data)
2012 <= CONSIDER_ALL_CANDIDATES_BOUND);
2014 if (data->consider_all_candidates)
2016 /* We will not need "related_cands" bitmaps in this case,
2017 so release them to decrease peak memory consumption. */
2018 for (i = 0; i < n_iv_uses (data); i++)
2020 use = iv_use (data, i);
2021 BITMAP_XFREE (use->related_cands);
2024 else
2026 /* Add important candidates to the related_cands bitmaps. */
2027 for (i = 0; i < n_iv_uses (data); i++)
2028 bitmap_ior_into (iv_use (data, i)->related_cands,
2029 data->important_candidates);
2033 /* Finds the candidates for the induction variables. */
2035 static void
2036 find_iv_candidates (struct ivopts_data *data)
2038 /* Add commonly used ivs. */
2039 add_standard_iv_candidates (data);
2041 /* Add old induction variables. */
2042 add_old_ivs_candidates (data);
2044 /* Add induction variables derived from uses. */
2045 add_derived_ivs_candidates (data);
2047 /* Record the important candidates. */
2048 record_important_candidates (data);
2051 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2052 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2053 we allocate a simple list to every use. */
2055 static void
2056 alloc_use_cost_map (struct ivopts_data *data)
2058 unsigned i, size, s, j;
2060 for (i = 0; i < n_iv_uses (data); i++)
2062 struct iv_use *use = iv_use (data, i);
2063 bitmap_iterator bi;
2065 if (data->consider_all_candidates)
2066 size = n_iv_cands (data);
2067 else
2069 s = 0;
2070 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2072 s++;
2075 /* Round up to the power of two, so that moduling by it is fast. */
2076 for (size = 1; size < s; size <<= 1)
2077 continue;
2080 use->n_map_members = size;
2081 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2085 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2086 on invariants DEPENDS_ON. */
2088 static void
2089 set_use_iv_cost (struct ivopts_data *data,
2090 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2091 bitmap depends_on)
2093 unsigned i, s;
2095 if (cost == INFTY)
2097 BITMAP_XFREE (depends_on);
2098 return;
2101 if (data->consider_all_candidates)
2103 use->cost_map[cand->id].cand = cand;
2104 use->cost_map[cand->id].cost = cost;
2105 use->cost_map[cand->id].depends_on = depends_on;
2106 return;
2109 /* n_map_members is a power of two, so this computes modulo. */
2110 s = cand->id & (use->n_map_members - 1);
2111 for (i = s; i < use->n_map_members; i++)
2112 if (!use->cost_map[i].cand)
2113 goto found;
2114 for (i = 0; i < s; i++)
2115 if (!use->cost_map[i].cand)
2116 goto found;
2118 gcc_unreachable ();
2120 found:
2121 use->cost_map[i].cand = cand;
2122 use->cost_map[i].cost = cost;
2123 use->cost_map[i].depends_on = depends_on;
2126 /* Gets cost of (USE, CANDIDATE) pair. */
2128 static struct cost_pair *
2129 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2130 struct iv_cand *cand)
2132 unsigned i, s;
2133 struct cost_pair *ret;
2135 if (!cand)
2136 return NULL;
2138 if (data->consider_all_candidates)
2140 ret = use->cost_map + cand->id;
2141 if (!ret->cand)
2142 return NULL;
2144 return ret;
2147 /* n_map_members is a power of two, so this computes modulo. */
2148 s = cand->id & (use->n_map_members - 1);
2149 for (i = s; i < use->n_map_members; i++)
2150 if (use->cost_map[i].cand == cand)
2151 return use->cost_map + i;
2153 for (i = 0; i < s; i++)
2154 if (use->cost_map[i].cand == cand)
2155 return use->cost_map + i;
2157 return NULL;
2160 /* Returns estimate on cost of computing SEQ. */
2162 static unsigned
2163 seq_cost (rtx seq)
2165 unsigned cost = 0;
2166 rtx set;
2168 for (; seq; seq = NEXT_INSN (seq))
2170 set = single_set (seq);
2171 if (set)
2172 cost += rtx_cost (set, SET);
2173 else
2174 cost++;
2177 return cost;
2180 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2181 static rtx
2182 produce_memory_decl_rtl (tree obj, int *regno)
2184 rtx x;
2185 if (!obj)
2186 abort ();
2187 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2189 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2190 x = gen_rtx_SYMBOL_REF (Pmode, name);
2192 else
2193 x = gen_raw_REG (Pmode, (*regno)++);
2195 return gen_rtx_MEM (DECL_MODE (obj), x);
2198 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2199 walk_tree. DATA contains the actual fake register number. */
2201 static tree
2202 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2204 tree obj = NULL_TREE;
2205 rtx x = NULL_RTX;
2206 int *regno = data;
2208 switch (TREE_CODE (*expr_p))
2210 case ADDR_EXPR:
2211 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2212 handled_component_p (*expr_p);
2213 expr_p = &TREE_OPERAND (*expr_p, 0))
2214 continue;
2215 obj = *expr_p;
2216 if (DECL_P (obj))
2217 x = produce_memory_decl_rtl (obj, regno);
2218 break;
2220 case SSA_NAME:
2221 *ws = 0;
2222 obj = SSA_NAME_VAR (*expr_p);
2223 if (!DECL_RTL_SET_P (obj))
2224 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2225 break;
2227 case VAR_DECL:
2228 case PARM_DECL:
2229 case RESULT_DECL:
2230 *ws = 0;
2231 obj = *expr_p;
2233 if (DECL_RTL_SET_P (obj))
2234 break;
2236 if (DECL_MODE (obj) == BLKmode)
2237 x = produce_memory_decl_rtl (obj, regno);
2238 else
2239 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2241 break;
2243 default:
2244 break;
2247 if (x)
2249 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2250 SET_DECL_RTL (obj, x);
2253 return NULL_TREE;
2256 /* Determines cost of the computation of EXPR. */
2258 static unsigned
2259 computation_cost (tree expr)
2261 rtx seq, rslt;
2262 tree type = TREE_TYPE (expr);
2263 unsigned cost;
2264 int regno = 0;
2266 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2267 start_sequence ();
2268 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2269 seq = get_insns ();
2270 end_sequence ();
2272 cost = seq_cost (seq);
2273 if (GET_CODE (rslt) == MEM)
2274 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2276 return cost;
2279 /* Returns variable containing the value of candidate CAND at statement AT. */
2281 static tree
2282 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2284 if (stmt_after_increment (loop, cand, stmt))
2285 return cand->var_after;
2286 else
2287 return cand->var_before;
2290 /* Determines the expression by that USE is expressed from induction variable
2291 CAND at statement AT in LOOP. */
2293 static tree
2294 get_computation_at (struct loop *loop,
2295 struct iv_use *use, struct iv_cand *cand, tree at)
2297 tree ubase = use->iv->base;
2298 tree ustep = use->iv->step;
2299 tree cbase = cand->iv->base;
2300 tree cstep = cand->iv->step;
2301 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2302 tree uutype;
2303 tree expr, delta;
2304 tree ratio;
2305 unsigned HOST_WIDE_INT ustepi, cstepi;
2306 HOST_WIDE_INT ratioi;
2308 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2310 /* We do not have a precision to express the values of use. */
2311 return NULL_TREE;
2314 expr = var_at_stmt (loop, cand, at);
2316 if (TREE_TYPE (expr) != ctype)
2318 /* This may happen with the original ivs. */
2319 expr = fold_convert (ctype, expr);
2322 if (TYPE_UNSIGNED (utype))
2323 uutype = utype;
2324 else
2326 uutype = unsigned_type_for (utype);
2327 ubase = fold_convert (uutype, ubase);
2328 ustep = fold_convert (uutype, ustep);
2331 if (uutype != ctype)
2333 expr = fold_convert (uutype, expr);
2334 cbase = fold_convert (uutype, cbase);
2335 cstep = fold_convert (uutype, cstep);
2338 if (!cst_and_fits_in_hwi (cstep)
2339 || !cst_and_fits_in_hwi (ustep))
2340 return NULL_TREE;
2342 ustepi = int_cst_value (ustep);
2343 cstepi = int_cst_value (cstep);
2345 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2347 /* TODO maybe consider case when ustep divides cstep and the ratio is
2348 a power of 2 (so that the division is fast to execute)? We would
2349 need to be much more careful with overflows etc. then. */
2350 return NULL_TREE;
2353 /* We may need to shift the value if we are after the increment. */
2354 if (stmt_after_increment (loop, cand, at))
2355 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2357 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2358 or |ratio| == 1, it is better to handle this like
2360 ubase - ratio * cbase + ratio * var. */
2362 if (ratioi == 1)
2364 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2365 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2367 else if (ratioi == -1)
2369 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2370 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2372 else if (TREE_CODE (cbase) == INTEGER_CST)
2374 ratio = build_int_cst_type (uutype, ratioi);
2375 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2376 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2377 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2378 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2380 else
2382 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2383 ratio = build_int_cst_type (uutype, ratioi);
2384 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2385 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2388 return fold_convert (utype, expr);
2391 /* Determines the expression by that USE is expressed from induction variable
2392 CAND in LOOP. */
2394 static tree
2395 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2397 return get_computation_at (loop, use, cand, use->stmt);
2400 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2402 static void
2403 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2405 tree op0, op1;
2406 enum tree_code code;
2408 while (1)
2410 if (cst_and_fits_in_hwi (*expr))
2412 *offset += int_cst_value (*expr);
2413 *expr = integer_zero_node;
2414 return;
2417 code = TREE_CODE (*expr);
2419 if (code != PLUS_EXPR && code != MINUS_EXPR)
2420 return;
2422 op0 = TREE_OPERAND (*expr, 0);
2423 op1 = TREE_OPERAND (*expr, 1);
2425 if (cst_and_fits_in_hwi (op1))
2427 if (code == PLUS_EXPR)
2428 *offset += int_cst_value (op1);
2429 else
2430 *offset -= int_cst_value (op1);
2432 *expr = op0;
2433 continue;
2436 if (code != PLUS_EXPR)
2437 return;
2439 if (!cst_and_fits_in_hwi (op0))
2440 return;
2442 *offset += int_cst_value (op0);
2443 *expr = op1;
2447 /* Returns cost of addition in MODE. */
2449 static unsigned
2450 add_cost (enum machine_mode mode)
2452 static unsigned costs[NUM_MACHINE_MODES];
2453 rtx seq;
2454 unsigned cost;
2456 if (costs[mode])
2457 return costs[mode];
2459 start_sequence ();
2460 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2461 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2462 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2463 NULL_RTX);
2464 seq = get_insns ();
2465 end_sequence ();
2467 cost = seq_cost (seq);
2468 if (!cost)
2469 cost = 1;
2471 costs[mode] = cost;
2473 if (dump_file && (dump_flags & TDF_DETAILS))
2474 fprintf (dump_file, "Addition in %s costs %d\n",
2475 GET_MODE_NAME (mode), cost);
2476 return cost;
2479 /* Entry in a hashtable of already known costs for multiplication. */
2480 struct mbc_entry
2482 HOST_WIDE_INT cst; /* The constant to multiply by. */
2483 enum machine_mode mode; /* In mode. */
2484 unsigned cost; /* The cost. */
2487 /* Counts hash value for the ENTRY. */
2489 static hashval_t
2490 mbc_entry_hash (const void *entry)
2492 const struct mbc_entry *e = entry;
2494 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2497 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2499 static int
2500 mbc_entry_eq (const void *entry1, const void *entry2)
2502 const struct mbc_entry *e1 = entry1;
2503 const struct mbc_entry *e2 = entry2;
2505 return (e1->mode == e2->mode
2506 && e1->cst == e2->cst);
2509 /* Returns cost of multiplication by constant CST in MODE. */
2511 static unsigned
2512 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2514 static htab_t costs;
2515 struct mbc_entry **cached, act;
2516 rtx seq;
2517 unsigned cost;
2519 if (!costs)
2520 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2522 act.mode = mode;
2523 act.cst = cst;
2524 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2525 if (*cached)
2526 return (*cached)->cost;
2528 *cached = xmalloc (sizeof (struct mbc_entry));
2529 (*cached)->mode = mode;
2530 (*cached)->cst = cst;
2532 start_sequence ();
2533 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2534 NULL_RTX, 0);
2535 seq = get_insns ();
2536 end_sequence ();
2538 cost = seq_cost (seq);
2540 if (dump_file && (dump_flags & TDF_DETAILS))
2541 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2542 (int) cst, GET_MODE_NAME (mode), cost);
2544 (*cached)->cost = cost;
2546 return cost;
2549 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2550 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2551 variable is omitted. The created memory accesses MODE.
2553 TODO -- there must be some better way. This all is quite crude. */
2555 static unsigned
2556 get_address_cost (bool symbol_present, bool var_present,
2557 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2559 #define MAX_RATIO 128
2560 static sbitmap valid_mult;
2561 static HOST_WIDE_INT rat, off;
2562 static HOST_WIDE_INT min_offset, max_offset;
2563 static unsigned costs[2][2][2][2];
2564 unsigned cost, acost;
2565 rtx seq, addr, base;
2566 bool offset_p, ratio_p;
2567 rtx reg1;
2568 HOST_WIDE_INT s_offset;
2569 unsigned HOST_WIDE_INT mask;
2570 unsigned bits;
2572 if (!valid_mult)
2574 HOST_WIDE_INT i;
2576 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2578 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2579 for (i = 1; i <= 1 << 20; i <<= 1)
2581 XEXP (addr, 1) = GEN_INT (i);
2582 if (!memory_address_p (Pmode, addr))
2583 break;
2585 max_offset = i >> 1;
2586 off = max_offset;
2588 for (i = 1; i <= 1 << 20; i <<= 1)
2590 XEXP (addr, 1) = GEN_INT (-i);
2591 if (!memory_address_p (Pmode, addr))
2592 break;
2594 min_offset = -(i >> 1);
2596 if (dump_file && (dump_flags & TDF_DETAILS))
2598 fprintf (dump_file, "get_address_cost:\n");
2599 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2600 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2603 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2604 sbitmap_zero (valid_mult);
2605 rat = 1;
2606 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2607 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2609 XEXP (addr, 1) = GEN_INT (i);
2610 if (memory_address_p (Pmode, addr))
2612 SET_BIT (valid_mult, i + MAX_RATIO);
2613 rat = i;
2617 if (dump_file && (dump_flags & TDF_DETAILS))
2619 fprintf (dump_file, " allowed multipliers:");
2620 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2621 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2622 fprintf (dump_file, " %d", (int) i);
2623 fprintf (dump_file, "\n");
2624 fprintf (dump_file, "\n");
2628 bits = GET_MODE_BITSIZE (Pmode);
2629 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2630 offset &= mask;
2631 if ((offset >> (bits - 1) & 1))
2632 offset |= ~mask;
2633 s_offset = offset;
2635 cost = 0;
2636 offset_p = (s_offset != 0
2637 && min_offset <= s_offset && s_offset <= max_offset);
2638 ratio_p = (ratio != 1
2639 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2640 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2642 if (ratio != 1 && !ratio_p)
2643 cost += multiply_by_cost (ratio, Pmode);
2645 if (s_offset && !offset_p && !symbol_present)
2647 cost += add_cost (Pmode);
2648 var_present = true;
2651 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2652 if (!acost)
2654 acost = 0;
2656 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2657 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2658 if (ratio_p)
2659 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2661 if (var_present)
2662 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2664 if (symbol_present)
2666 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2667 if (offset_p)
2668 base = gen_rtx_fmt_e (CONST, Pmode,
2669 gen_rtx_fmt_ee (PLUS, Pmode,
2670 base,
2671 GEN_INT (off)));
2673 else if (offset_p)
2674 base = GEN_INT (off);
2675 else
2676 base = NULL_RTX;
2678 if (base)
2679 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2681 start_sequence ();
2682 addr = memory_address (Pmode, addr);
2683 seq = get_insns ();
2684 end_sequence ();
2686 acost = seq_cost (seq);
2687 acost += address_cost (addr, Pmode);
2689 if (!acost)
2690 acost = 1;
2691 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2694 return cost + acost;
2697 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2698 the bitmap to that we should store it. */
2700 static struct ivopts_data *fd_ivopts_data;
2701 static tree
2702 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2704 bitmap *depends_on = data;
2705 struct version_info *info;
2707 if (TREE_CODE (*expr_p) != SSA_NAME)
2708 return NULL_TREE;
2709 info = name_info (fd_ivopts_data, *expr_p);
2711 if (!info->inv_id || info->has_nonlin_use)
2712 return NULL_TREE;
2714 if (!*depends_on)
2715 *depends_on = BITMAP_XMALLOC ();
2716 bitmap_set_bit (*depends_on, info->inv_id);
2718 return NULL_TREE;
2721 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2722 invariants the computation depends on. */
2724 static unsigned
2725 force_var_cost (struct ivopts_data *data,
2726 tree expr, bitmap *depends_on)
2728 static bool costs_initialized = false;
2729 static unsigned integer_cost;
2730 static unsigned symbol_cost;
2731 static unsigned address_cost;
2732 tree op0, op1;
2733 unsigned cost0, cost1, cost;
2734 enum machine_mode mode;
2736 if (!costs_initialized)
2738 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2739 rtx x = gen_rtx_MEM (DECL_MODE (var),
2740 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2741 tree addr;
2742 tree type = build_pointer_type (integer_type_node);
2744 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2745 2000));
2747 SET_DECL_RTL (var, x);
2748 TREE_STATIC (var) = 1;
2749 addr = build1 (ADDR_EXPR, type, var);
2750 symbol_cost = computation_cost (addr) + 1;
2752 address_cost
2753 = computation_cost (build2 (PLUS_EXPR, type,
2754 addr,
2755 build_int_cst_type (type, 2000))) + 1;
2756 if (dump_file && (dump_flags & TDF_DETAILS))
2758 fprintf (dump_file, "force_var_cost:\n");
2759 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2760 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2761 fprintf (dump_file, " address %d\n", (int) address_cost);
2762 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2763 fprintf (dump_file, "\n");
2766 costs_initialized = true;
2769 if (depends_on)
2771 fd_ivopts_data = data;
2772 walk_tree (&expr, find_depends, depends_on, NULL);
2775 if (SSA_VAR_P (expr))
2776 return 0;
2778 if (TREE_INVARIANT (expr))
2780 if (TREE_CODE (expr) == INTEGER_CST)
2781 return integer_cost;
2783 if (TREE_CODE (expr) == ADDR_EXPR)
2785 tree obj = TREE_OPERAND (expr, 0);
2787 if (TREE_CODE (obj) == VAR_DECL
2788 || TREE_CODE (obj) == PARM_DECL
2789 || TREE_CODE (obj) == RESULT_DECL)
2790 return symbol_cost;
2793 return address_cost;
2796 switch (TREE_CODE (expr))
2798 case PLUS_EXPR:
2799 case MINUS_EXPR:
2800 case MULT_EXPR:
2801 op0 = TREE_OPERAND (expr, 0);
2802 op1 = TREE_OPERAND (expr, 1);
2804 if (is_gimple_val (op0))
2805 cost0 = 0;
2806 else
2807 cost0 = force_var_cost (data, op0, NULL);
2809 if (is_gimple_val (op1))
2810 cost1 = 0;
2811 else
2812 cost1 = force_var_cost (data, op1, NULL);
2814 break;
2816 default:
2817 /* Just an arbitrary value, FIXME. */
2818 return target_spill_cost;
2821 mode = TYPE_MODE (TREE_TYPE (expr));
2822 switch (TREE_CODE (expr))
2824 case PLUS_EXPR:
2825 case MINUS_EXPR:
2826 cost = add_cost (mode);
2827 break;
2829 case MULT_EXPR:
2830 if (cst_and_fits_in_hwi (op0))
2831 cost = multiply_by_cost (int_cst_value (op0), mode);
2832 else if (cst_and_fits_in_hwi (op1))
2833 cost = multiply_by_cost (int_cst_value (op1), mode);
2834 else
2835 return target_spill_cost;
2836 break;
2838 default:
2839 gcc_unreachable ();
2842 cost += cost0;
2843 cost += cost1;
2845 /* Bound the cost by target_spill_cost. The parts of complicated
2846 computations often are either loop invariant or at least can
2847 be shared between several iv uses, so letting this grow without
2848 limits would not give reasonable results. */
2849 return cost < target_spill_cost ? cost : target_spill_cost;
2852 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2853 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2854 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2855 invariants the computation depends on. */
2857 static unsigned
2858 split_address_cost (struct ivopts_data *data,
2859 tree addr, bool *symbol_present, bool *var_present,
2860 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2862 tree core;
2863 HOST_WIDE_INT bitsize;
2864 HOST_WIDE_INT bitpos;
2865 tree toffset;
2866 enum machine_mode mode;
2867 int unsignedp, volatilep;
2869 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2870 &unsignedp, &volatilep, false);
2872 if (toffset != 0
2873 || bitpos % BITS_PER_UNIT != 0
2874 || TREE_CODE (core) != VAR_DECL)
2876 *symbol_present = false;
2877 *var_present = true;
2878 fd_ivopts_data = data;
2879 walk_tree (&addr, find_depends, depends_on, NULL);
2880 return target_spill_cost;
2883 *offset += bitpos / BITS_PER_UNIT;
2884 if (TREE_STATIC (core)
2885 || DECL_EXTERNAL (core))
2887 *symbol_present = true;
2888 *var_present = false;
2889 return 0;
2892 *symbol_present = false;
2893 *var_present = true;
2894 return 0;
2897 /* Estimates cost of expressing difference of addresses E1 - E2 as
2898 var + symbol + offset. The value of offset is added to OFFSET,
2899 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2900 part is missing. DEPENDS_ON is a set of the invariants the computation
2901 depends on. */
2903 static unsigned
2904 ptr_difference_cost (struct ivopts_data *data,
2905 tree e1, tree e2, bool *symbol_present, bool *var_present,
2906 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2908 HOST_WIDE_INT diff = 0;
2909 unsigned cost;
2911 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2913 if (ptr_difference_const (e1, e2, &diff))
2915 *offset += diff;
2916 *symbol_present = false;
2917 *var_present = false;
2918 return 0;
2921 if (e2 == integer_zero_node)
2922 return split_address_cost (data, TREE_OPERAND (e1, 0),
2923 symbol_present, var_present, offset, depends_on);
2925 *symbol_present = false;
2926 *var_present = true;
2928 cost = force_var_cost (data, e1, depends_on);
2929 cost += force_var_cost (data, e2, depends_on);
2930 cost += add_cost (Pmode);
2932 return cost;
2935 /* Estimates cost of expressing difference E1 - E2 as
2936 var + symbol + offset. The value of offset is added to OFFSET,
2937 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2938 part is missing. DEPENDS_ON is a set of the invariants the computation
2939 depends on. */
2941 static unsigned
2942 difference_cost (struct ivopts_data *data,
2943 tree e1, tree e2, bool *symbol_present, bool *var_present,
2944 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2946 unsigned cost;
2947 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2949 strip_offset (&e1, offset);
2950 *offset = -*offset;
2951 strip_offset (&e2, offset);
2952 *offset = -*offset;
2954 if (TREE_CODE (e1) == ADDR_EXPR)
2955 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2956 depends_on);
2957 *symbol_present = false;
2959 if (operand_equal_p (e1, e2, 0))
2961 *var_present = false;
2962 return 0;
2964 *var_present = true;
2965 if (zero_p (e2))
2966 return force_var_cost (data, e1, depends_on);
2968 if (zero_p (e1))
2970 cost = force_var_cost (data, e2, depends_on);
2971 cost += multiply_by_cost (-1, mode);
2973 return cost;
2976 cost = force_var_cost (data, e1, depends_on);
2977 cost += force_var_cost (data, e2, depends_on);
2978 cost += add_cost (mode);
2980 return cost;
2983 /* Determines the cost of the computation by that USE is expressed
2984 from induction variable CAND. If ADDRESS_P is true, we just need
2985 to create an address from it, otherwise we want to get it into
2986 register. A set of invariants we depend on is stored in
2987 DEPENDS_ON. AT is the statement at that the value is computed. */
2989 static unsigned
2990 get_computation_cost_at (struct ivopts_data *data,
2991 struct iv_use *use, struct iv_cand *cand,
2992 bool address_p, bitmap *depends_on, tree at)
2994 tree ubase = use->iv->base, ustep = use->iv->step;
2995 tree cbase, cstep;
2996 tree utype = TREE_TYPE (ubase), ctype;
2997 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2998 HOST_WIDE_INT ratio, aratio;
2999 bool var_present, symbol_present;
3000 unsigned cost = 0, n_sums;
3002 *depends_on = NULL;
3004 /* Only consider real candidates. */
3005 if (!cand->iv)
3006 return INFTY;
3008 cbase = cand->iv->base;
3009 cstep = cand->iv->step;
3010 ctype = TREE_TYPE (cbase);
3012 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3014 /* We do not have a precision to express the values of use. */
3015 return INFTY;
3018 if (address_p)
3020 /* Do not try to express address of an object with computation based
3021 on address of a different object. This may cause problems in rtl
3022 level alias analysis (that does not expect this to be happening,
3023 as this is illegal in C), and would be unlikely to be useful
3024 anyway. */
3025 if (use->iv->base_object
3026 && cand->iv->base_object
3027 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3028 return INFTY;
3031 if (!cst_and_fits_in_hwi (ustep)
3032 || !cst_and_fits_in_hwi (cstep))
3033 return INFTY;
3035 if (TREE_CODE (ubase) == INTEGER_CST
3036 && !cst_and_fits_in_hwi (ubase))
3037 goto fallback;
3039 if (TREE_CODE (cbase) == INTEGER_CST
3040 && !cst_and_fits_in_hwi (cbase))
3041 goto fallback;
3043 ustepi = int_cst_value (ustep);
3044 cstepi = int_cst_value (cstep);
3046 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3048 /* TODO -- add direct handling of this case. */
3049 goto fallback;
3052 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3053 return INFTY;
3055 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3056 or ratio == 1, it is better to handle this like
3058 ubase - ratio * cbase + ratio * var
3060 (also holds in the case ratio == -1, TODO. */
3062 if (TREE_CODE (cbase) == INTEGER_CST)
3064 offset = - ratio * int_cst_value (cbase);
3065 cost += difference_cost (data,
3066 ubase, integer_zero_node,
3067 &symbol_present, &var_present, &offset,
3068 depends_on);
3070 else if (ratio == 1)
3072 cost += difference_cost (data,
3073 ubase, cbase,
3074 &symbol_present, &var_present, &offset,
3075 depends_on);
3077 else
3079 cost += force_var_cost (data, cbase, depends_on);
3080 cost += add_cost (TYPE_MODE (ctype));
3081 cost += difference_cost (data,
3082 ubase, integer_zero_node,
3083 &symbol_present, &var_present, &offset,
3084 depends_on);
3087 /* If we are after the increment, the value of the candidate is higher by
3088 one iteration. */
3089 if (stmt_after_increment (data->current_loop, cand, at))
3090 offset -= ratio * cstepi;
3092 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3093 (symbol/var/const parts may be omitted). If we are looking for an address,
3094 find the cost of addressing this. */
3095 if (address_p)
3096 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3098 /* Otherwise estimate the costs for computing the expression. */
3099 aratio = ratio > 0 ? ratio : -ratio;
3100 if (!symbol_present && !var_present && !offset)
3102 if (ratio != 1)
3103 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3105 return cost;
3108 if (aratio != 1)
3109 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3111 n_sums = 1;
3112 if (var_present
3113 /* Symbol + offset should be compile-time computable. */
3114 && (symbol_present || offset))
3115 n_sums++;
3117 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3119 fallback:
3121 /* Just get the expression, expand it and measure the cost. */
3122 tree comp = get_computation_at (data->current_loop, use, cand, at);
3124 if (!comp)
3125 return INFTY;
3127 if (address_p)
3128 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3130 return computation_cost (comp);
3134 /* Determines the cost of the computation by that USE is expressed
3135 from induction variable CAND. If ADDRESS_P is true, we just need
3136 to create an address from it, otherwise we want to get it into
3137 register. A set of invariants we depend on is stored in
3138 DEPENDS_ON. */
3140 static unsigned
3141 get_computation_cost (struct ivopts_data *data,
3142 struct iv_use *use, struct iv_cand *cand,
3143 bool address_p, bitmap *depends_on)
3145 return get_computation_cost_at (data,
3146 use, cand, address_p, depends_on, use->stmt);
3149 /* Determines cost of basing replacement of USE on CAND in a generic
3150 expression. */
3152 static bool
3153 determine_use_iv_cost_generic (struct ivopts_data *data,
3154 struct iv_use *use, struct iv_cand *cand)
3156 bitmap depends_on;
3157 unsigned cost;
3159 /* The simple case first -- if we need to express value of the preserved
3160 original biv, the cost is 0. This also prevents us from counting the
3161 cost of increment twice -- once at this use and once in the cost of
3162 the candidate. */
3163 if (cand->pos == IP_ORIGINAL
3164 && cand->incremented_at == use->stmt)
3166 set_use_iv_cost (data, use, cand, 0, NULL);
3167 return true;
3170 cost = get_computation_cost (data, use, cand, false, &depends_on);
3171 set_use_iv_cost (data, use, cand, cost, depends_on);
3173 return cost != INFTY;
3176 /* Determines cost of basing replacement of USE on CAND in an address. */
3178 static bool
3179 determine_use_iv_cost_address (struct ivopts_data *data,
3180 struct iv_use *use, struct iv_cand *cand)
3182 bitmap depends_on;
3183 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3185 set_use_iv_cost (data, use, cand, cost, depends_on);
3187 return cost != INFTY;
3190 /* Computes value of induction variable IV in iteration NITER. */
3192 static tree
3193 iv_value (struct iv *iv, tree niter)
3195 tree val;
3196 tree type = TREE_TYPE (iv->base);
3198 niter = fold_convert (type, niter);
3199 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3201 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3204 /* Computes value of candidate CAND at position AT in iteration NITER. */
3206 static tree
3207 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3209 tree val = iv_value (cand->iv, niter);
3210 tree type = TREE_TYPE (cand->iv->base);
3212 if (stmt_after_increment (loop, cand, at))
3213 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3215 return val;
3218 /* Check whether it is possible to express the condition in USE by comparison
3219 of candidate CAND. If so, store the comparison code to COMPARE and the
3220 value compared with to BOUND. */
3222 static bool
3223 may_eliminate_iv (struct loop *loop,
3224 struct iv_use *use, struct iv_cand *cand,
3225 enum tree_code *compare, tree *bound)
3227 basic_block ex_bb;
3228 edge exit;
3229 struct tree_niter_desc niter, new_niter;
3230 tree wider_type, type, base;
3232 /* For now works only for exits that dominate the loop latch. TODO -- extend
3233 for other conditions inside loop body. */
3234 ex_bb = bb_for_stmt (use->stmt);
3235 if (use->stmt != last_stmt (ex_bb)
3236 || TREE_CODE (use->stmt) != COND_EXPR)
3237 return false;
3238 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3239 return false;
3241 exit = EDGE_SUCC (ex_bb, 0);
3242 if (flow_bb_inside_loop_p (loop, exit->dest))
3243 exit = EDGE_SUCC (ex_bb, 1);
3244 if (flow_bb_inside_loop_p (loop, exit->dest))
3245 return false;
3247 niter.niter = NULL_TREE;
3248 number_of_iterations_exit (loop, exit, &niter);
3249 if (!niter.niter
3250 || !integer_nonzerop (niter.assumptions)
3251 || !integer_zerop (niter.may_be_zero))
3252 return false;
3254 if (exit->flags & EDGE_TRUE_VALUE)
3255 *compare = EQ_EXPR;
3256 else
3257 *compare = NE_EXPR;
3259 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3261 /* Let us check there is not some problem with overflows, by checking that
3262 the number of iterations is unchanged. */
3263 base = cand->iv->base;
3264 type = TREE_TYPE (base);
3265 if (stmt_after_increment (loop, cand, use->stmt))
3266 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3268 new_niter.niter = NULL_TREE;
3269 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3270 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3271 &new_niter);
3272 if (!new_niter.niter
3273 || !integer_nonzerop (new_niter.assumptions)
3274 || !integer_zerop (new_niter.may_be_zero))
3275 return false;
3277 wider_type = TREE_TYPE (new_niter.niter);
3278 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3279 wider_type = TREE_TYPE (niter.niter);
3280 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3281 fold_convert (wider_type, new_niter.niter), 0))
3282 return false;
3284 return true;
3287 /* Determines cost of basing replacement of USE on CAND in a condition. */
3289 static bool
3290 determine_use_iv_cost_condition (struct ivopts_data *data,
3291 struct iv_use *use, struct iv_cand *cand)
3293 tree bound;
3294 enum tree_code compare;
3296 /* Only consider real candidates. */
3297 if (!cand->iv)
3299 set_use_iv_cost (data, use, cand, INFTY, NULL);
3300 return false;
3303 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3305 bitmap depends_on = NULL;
3306 unsigned cost = force_var_cost (data, bound, &depends_on);
3308 set_use_iv_cost (data, use, cand, cost, depends_on);
3309 return cost != INFTY;
3312 /* The induction variable elimination failed; just express the original
3313 giv. If it is compared with an invariant, note that we cannot get
3314 rid of it. */
3315 if (TREE_CODE (*use->op_p) == SSA_NAME)
3316 record_invariant (data, *use->op_p, true);
3317 else
3319 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3320 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3323 return determine_use_iv_cost_generic (data, use, cand);
3326 /* Checks whether it is possible to replace the final value of USE by
3327 a direct computation. If so, the formula is stored to *VALUE. */
3329 static bool
3330 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3332 edge exit;
3333 struct tree_niter_desc *niter;
3335 exit = single_dom_exit (loop);
3336 if (!exit)
3337 return false;
3339 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3340 bb_for_stmt (use->stmt)));
3342 niter = &loop_data (loop)->niter;
3343 if (!niter->niter
3344 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3345 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3346 return false;
3348 *value = iv_value (use->iv, niter->niter);
3350 return true;
3353 /* Determines cost of replacing final value of USE using CAND. */
3355 static bool
3356 determine_use_iv_cost_outer (struct ivopts_data *data,
3357 struct iv_use *use, struct iv_cand *cand)
3359 bitmap depends_on;
3360 unsigned cost;
3361 edge exit;
3362 tree value;
3363 struct loop *loop = data->current_loop;
3365 /* The simple case first -- if we need to express value of the preserved
3366 original biv, the cost is 0. This also prevents us from counting the
3367 cost of increment twice -- once at this use and once in the cost of
3368 the candidate. */
3369 if (cand->pos == IP_ORIGINAL
3370 && cand->incremented_at == use->stmt)
3372 set_use_iv_cost (data, use, cand, 0, NULL);
3373 return true;
3376 if (!cand->iv)
3378 if (!may_replace_final_value (loop, use, &value))
3380 set_use_iv_cost (data, use, cand, INFTY, NULL);
3381 return false;
3384 depends_on = NULL;
3385 cost = force_var_cost (data, value, &depends_on);
3387 cost /= AVG_LOOP_NITER (loop);
3389 set_use_iv_cost (data, use, cand, cost, depends_on);
3390 return cost != INFTY;
3393 exit = single_dom_exit (loop);
3394 if (exit)
3396 /* If there is just a single exit, we may use value of the candidate
3397 after we take it to determine the value of use. */
3398 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3399 last_stmt (exit->src));
3400 if (cost != INFTY)
3401 cost /= AVG_LOOP_NITER (loop);
3403 else
3405 /* Otherwise we just need to compute the iv. */
3406 cost = get_computation_cost (data, use, cand, false, &depends_on);
3409 set_use_iv_cost (data, use, cand, cost, depends_on);
3411 return cost != INFTY;
3414 /* Determines cost of basing replacement of USE on CAND. Returns false
3415 if USE cannot be based on CAND. */
3417 static bool
3418 determine_use_iv_cost (struct ivopts_data *data,
3419 struct iv_use *use, struct iv_cand *cand)
3421 switch (use->type)
3423 case USE_NONLINEAR_EXPR:
3424 return determine_use_iv_cost_generic (data, use, cand);
3426 case USE_OUTER:
3427 return determine_use_iv_cost_outer (data, use, cand);
3429 case USE_ADDRESS:
3430 return determine_use_iv_cost_address (data, use, cand);
3432 case USE_COMPARE:
3433 return determine_use_iv_cost_condition (data, use, cand);
3435 default:
3436 gcc_unreachable ();
3440 /* Determines costs of basing the use of the iv on an iv candidate. */
3442 static void
3443 determine_use_iv_costs (struct ivopts_data *data)
3445 unsigned i, j;
3446 struct iv_use *use;
3447 struct iv_cand *cand;
3448 bitmap to_clear = BITMAP_XMALLOC ();
3450 alloc_use_cost_map (data);
3452 for (i = 0; i < n_iv_uses (data); i++)
3454 use = iv_use (data, i);
3456 if (data->consider_all_candidates)
3458 for (j = 0; j < n_iv_cands (data); j++)
3460 cand = iv_cand (data, j);
3461 determine_use_iv_cost (data, use, cand);
3464 else
3466 bitmap_iterator bi;
3468 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3470 cand = iv_cand (data, j);
3471 if (!determine_use_iv_cost (data, use, cand))
3472 bitmap_set_bit (to_clear, j);
3475 /* Remove the candidates for that the cost is infinite from
3476 the list of related candidates. */
3477 bitmap_and_compl_into (use->related_cands, to_clear);
3478 bitmap_clear (to_clear);
3482 BITMAP_XFREE (to_clear);
3484 if (dump_file && (dump_flags & TDF_DETAILS))
3486 fprintf (dump_file, "Use-candidate costs:\n");
3488 for (i = 0; i < n_iv_uses (data); i++)
3490 use = iv_use (data, i);
3492 fprintf (dump_file, "Use %d:\n", i);
3493 fprintf (dump_file, " cand\tcost\tdepends on\n");
3494 for (j = 0; j < use->n_map_members; j++)
3496 if (!use->cost_map[j].cand
3497 || use->cost_map[j].cost == INFTY)
3498 continue;
3500 fprintf (dump_file, " %d\t%d\t",
3501 use->cost_map[j].cand->id,
3502 use->cost_map[j].cost);
3503 if (use->cost_map[j].depends_on)
3504 bitmap_print (dump_file,
3505 use->cost_map[j].depends_on, "","");
3506 fprintf (dump_file, "\n");
3509 fprintf (dump_file, "\n");
3511 fprintf (dump_file, "\n");
3515 /* Determines cost of the candidate CAND. */
3517 static void
3518 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3520 unsigned cost_base, cost_step;
3521 tree base, last;
3522 basic_block bb;
3524 if (!cand->iv)
3526 cand->cost = 0;
3527 return;
3530 /* There are two costs associated with the candidate -- its increment
3531 and its initialization. The second is almost negligible for any loop
3532 that rolls enough, so we take it just very little into account. */
3534 base = cand->iv->base;
3535 cost_base = force_var_cost (data, base, NULL);
3536 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3538 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3540 /* Prefer the original iv unless we may gain something by replacing it. */
3541 if (cand->pos == IP_ORIGINAL)
3542 cand->cost--;
3544 /* Prefer not to insert statements into latch unless there are some
3545 already (so that we do not create unnecessary jumps). */
3546 if (cand->pos == IP_END)
3548 bb = ip_end_pos (data->current_loop);
3549 last = last_stmt (bb);
3551 if (!last
3552 || TREE_CODE (last) == LABEL_EXPR)
3553 cand->cost++;
3557 /* Determines costs of computation of the candidates. */
3559 static void
3560 determine_iv_costs (struct ivopts_data *data)
3562 unsigned i;
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3566 fprintf (dump_file, "Candidate costs:\n");
3567 fprintf (dump_file, " cand\tcost\n");
3570 for (i = 0; i < n_iv_cands (data); i++)
3572 struct iv_cand *cand = iv_cand (data, i);
3574 determine_iv_cost (data, cand);
3576 if (dump_file && (dump_flags & TDF_DETAILS))
3577 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3580 if (dump_file && (dump_flags & TDF_DETAILS))
3581 fprintf (dump_file, "\n");
3584 /* Calculates cost for having SIZE induction variables. */
3586 static unsigned
3587 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3589 return global_cost_for_size (size,
3590 loop_data (data->current_loop)->regs_used,
3591 n_iv_uses (data));
3594 /* For each size of the induction variable set determine the penalty. */
3596 static void
3597 determine_set_costs (struct ivopts_data *data)
3599 unsigned j, n;
3600 tree phi, op;
3601 struct loop *loop = data->current_loop;
3602 bitmap_iterator bi;
3604 /* We use the following model (definitely improvable, especially the
3605 cost function -- TODO):
3607 We estimate the number of registers available (using MD data), name it A.
3609 We estimate the number of registers used by the loop, name it U. This
3610 number is obtained as the number of loop phi nodes (not counting virtual
3611 registers and bivs) + the number of variables from outside of the loop.
3613 We set a reserve R (free regs that are used for temporary computations,
3614 etc.). For now the reserve is a constant 3.
3616 Let I be the number of induction variables.
3618 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3619 make a lot of ivs without a reason).
3620 -- if A - R < U + I <= A, the cost is I * PRES_COST
3621 -- if U + I > A, the cost is I * PRES_COST and
3622 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3624 if (dump_file && (dump_flags & TDF_DETAILS))
3626 fprintf (dump_file, "Global costs:\n");
3627 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3628 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3629 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3630 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3633 n = 0;
3634 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3636 op = PHI_RESULT (phi);
3638 if (!is_gimple_reg (op))
3639 continue;
3641 if (get_iv (data, op))
3642 continue;
3644 n++;
3647 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3649 struct version_info *info = ver_info (data, j);
3651 if (info->inv_id && info->has_nonlin_use)
3652 n++;
3655 loop_data (loop)->regs_used = n;
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3657 fprintf (dump_file, " regs_used %d\n", n);
3659 if (dump_file && (dump_flags & TDF_DETAILS))
3661 fprintf (dump_file, " cost for size:\n");
3662 fprintf (dump_file, " ivs\tcost\n");
3663 for (j = 0; j <= 2 * target_avail_regs; j++)
3664 fprintf (dump_file, " %d\t%d\n", j,
3665 ivopts_global_cost_for_size (data, j));
3666 fprintf (dump_file, "\n");
3670 /* Returns true if A is a cheaper cost pair than B. */
3672 static bool
3673 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3675 if (!a)
3676 return false;
3678 if (!b)
3679 return true;
3681 if (a->cost < b->cost)
3682 return true;
3684 if (a->cost > b->cost)
3685 return false;
3687 /* In case the costs are the same, prefer the cheaper candidate. */
3688 if (a->cand->cost < b->cand->cost)
3689 return true;
3691 return false;
3694 /* Computes the cost field of IVS structure. */
3696 static void
3697 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3699 unsigned cost = 0;
3701 cost += ivs->cand_use_cost;
3702 cost += ivs->cand_cost;
3703 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3705 ivs->cost = cost;
3708 /* Set USE not to be expressed by any candidate in IVS. */
3710 static void
3711 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3712 struct iv_use *use)
3714 unsigned uid = use->id, cid, iid;
3715 bitmap deps;
3716 struct cost_pair *cp;
3717 bitmap_iterator bi;
3719 cp = ivs->cand_for_use[uid];
3720 if (!cp)
3721 return;
3722 cid = cp->cand->id;
3724 ivs->bad_uses++;
3725 ivs->cand_for_use[uid] = NULL;
3726 ivs->n_cand_uses[cid]--;
3728 if (ivs->n_cand_uses[cid] == 0)
3730 bitmap_clear_bit (ivs->cands, cid);
3731 /* Do not count the pseudocandidates. */
3732 if (cp->cand->iv)
3733 ivs->n_regs--;
3734 ivs->n_cands--;
3735 ivs->cand_cost -= cp->cand->cost;
3738 ivs->cand_use_cost -= cp->cost;
3740 deps = cp->depends_on;
3742 if (deps)
3744 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3746 ivs->n_invariant_uses[iid]--;
3747 if (ivs->n_invariant_uses[iid] == 0)
3748 ivs->n_regs--;
3752 iv_ca_recount_cost (data, ivs);
3755 /* Set cost pair for USE in set IVS to CP. */
3757 static void
3758 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3759 struct iv_use *use, struct cost_pair *cp)
3761 unsigned uid = use->id, cid, iid;
3762 bitmap deps;
3763 bitmap_iterator bi;
3765 if (ivs->cand_for_use[uid] == cp)
3766 return;
3768 if (ivs->cand_for_use[uid])
3769 iv_ca_set_no_cp (data, ivs, use);
3771 if (cp)
3773 cid = cp->cand->id;
3775 ivs->bad_uses--;
3776 ivs->cand_for_use[uid] = cp;
3777 ivs->n_cand_uses[cid]++;
3778 if (ivs->n_cand_uses[cid] == 1)
3780 bitmap_set_bit (ivs->cands, cid);
3781 /* Do not count the pseudocandidates. */
3782 if (cp->cand->iv)
3783 ivs->n_regs++;
3784 ivs->n_cands++;
3785 ivs->cand_cost += cp->cand->cost;
3788 ivs->cand_use_cost += cp->cost;
3790 deps = cp->depends_on;
3792 if (deps)
3794 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3796 ivs->n_invariant_uses[iid]++;
3797 if (ivs->n_invariant_uses[iid] == 1)
3798 ivs->n_regs++;
3802 iv_ca_recount_cost (data, ivs);
3806 /* Extend set IVS by expressing USE by some of the candidates in it
3807 if possible. */
3809 static void
3810 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3811 struct iv_use *use)
3813 struct cost_pair *best_cp = NULL, *cp;
3814 bitmap_iterator bi;
3815 unsigned i;
3817 gcc_assert (ivs->upto >= use->id);
3819 if (ivs->upto == use->id)
3821 ivs->upto++;
3822 ivs->bad_uses++;
3825 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3827 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3829 if (cheaper_cost_pair (cp, best_cp))
3830 best_cp = cp;
3833 iv_ca_set_cp (data, ivs, use, best_cp);
3836 /* Get cost for assignment IVS. */
3838 static unsigned
3839 iv_ca_cost (struct iv_ca *ivs)
3841 return (ivs->bad_uses ? INFTY : ivs->cost);
3844 /* Returns true if all dependences of CP are among invariants in IVS. */
3846 static bool
3847 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3849 unsigned i;
3850 bitmap_iterator bi;
3852 if (!cp->depends_on)
3853 return true;
3855 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3857 if (ivs->n_invariant_uses[i] == 0)
3858 return false;
3861 return true;
3864 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3865 it before NEXT_CHANGE. */
3867 static struct iv_ca_delta *
3868 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3869 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3871 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3873 change->use = use;
3874 change->old_cp = old_cp;
3875 change->new_cp = new_cp;
3876 change->next_change = next_change;
3878 return change;
3881 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3882 are rewritten. */
3884 static struct iv_ca_delta *
3885 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
3887 struct iv_ca_delta *last;
3889 if (!l2)
3890 return l1;
3892 if (!l1)
3893 return l2;
3895 for (last = l1; last->next_change; last = last->next_change)
3896 continue;
3897 last->next_change = l2;
3899 return l1;
3902 /* Returns candidate by that USE is expressed in IVS. */
3904 static struct cost_pair *
3905 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3907 return ivs->cand_for_use[use->id];
3910 /* Reverse the list of changes DELTA, forming the inverse to it. */
3912 static struct iv_ca_delta *
3913 iv_ca_delta_reverse (struct iv_ca_delta *delta)
3915 struct iv_ca_delta *act, *next, *prev = NULL;
3916 struct cost_pair *tmp;
3918 for (act = delta; act; act = next)
3920 next = act->next_change;
3921 act->next_change = prev;
3922 prev = act;
3924 tmp = act->old_cp;
3925 act->old_cp = act->new_cp;
3926 act->new_cp = tmp;
3929 return prev;
3932 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3933 reverted instead. */
3935 static void
3936 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3937 struct iv_ca_delta *delta, bool forward)
3939 struct cost_pair *from, *to;
3940 struct iv_ca_delta *act;
3942 if (!forward)
3943 delta = iv_ca_delta_reverse (delta);
3945 for (act = delta; act; act = act->next_change)
3947 from = act->old_cp;
3948 to = act->new_cp;
3949 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
3950 iv_ca_set_cp (data, ivs, act->use, to);
3953 if (!forward)
3954 iv_ca_delta_reverse (delta);
3957 /* Returns true if CAND is used in IVS. */
3959 static bool
3960 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3962 return ivs->n_cand_uses[cand->id] > 0;
3965 /* Returns number of induction variable candidates in the set IVS. */
3967 static unsigned
3968 iv_ca_n_cands (struct iv_ca *ivs)
3970 return ivs->n_cands;
3973 /* Free the list of changes DELTA. */
3975 static void
3976 iv_ca_delta_free (struct iv_ca_delta **delta)
3978 struct iv_ca_delta *act, *next;
3980 for (act = *delta; act; act = next)
3982 next = act->next_change;
3983 free (act);
3986 *delta = NULL;
3989 /* Allocates new iv candidates assignment. */
3991 static struct iv_ca *
3992 iv_ca_new (struct ivopts_data *data)
3994 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3996 nw->upto = 0;
3997 nw->bad_uses = 0;
3998 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3999 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4000 nw->cands = BITMAP_XMALLOC ();
4001 nw->n_cands = 0;
4002 nw->n_regs = 0;
4003 nw->cand_use_cost = 0;
4004 nw->cand_cost = 0;
4005 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4006 nw->cost = 0;
4008 return nw;
4011 /* Free memory occupied by the set IVS. */
4013 static void
4014 iv_ca_free (struct iv_ca **ivs)
4016 free ((*ivs)->cand_for_use);
4017 free ((*ivs)->n_cand_uses);
4018 BITMAP_XFREE ((*ivs)->cands);
4019 free ((*ivs)->n_invariant_uses);
4020 free (*ivs);
4021 *ivs = NULL;
4024 /* Dumps IVS to FILE. */
4026 static void
4027 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4029 const char *pref = " invariants ";
4030 unsigned i;
4032 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4033 bitmap_print (file, ivs->cands, " candidates ","\n");
4035 for (i = 1; i <= data->max_inv_id; i++)
4036 if (ivs->n_invariant_uses[i])
4038 fprintf (file, "%s%d", pref, i);
4039 pref = ", ";
4041 fprintf (file, "\n");
4044 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4045 new set, and store differences in DELTA. Number of induction variables
4046 in the new set is stored to N_IVS. */
4048 static unsigned
4049 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4050 struct iv_cand *cand, struct iv_ca_delta **delta,
4051 unsigned *n_ivs)
4053 unsigned i, cost;
4054 struct iv_use *use;
4055 struct cost_pair *old_cp, *new_cp;
4057 *delta = NULL;
4058 for (i = 0; i < ivs->upto; i++)
4060 use = iv_use (data, i);
4061 old_cp = iv_ca_cand_for_use (ivs, use);
4063 if (old_cp
4064 && old_cp->cand == cand)
4065 continue;
4067 new_cp = get_use_iv_cost (data, use, cand);
4068 if (!new_cp)
4069 continue;
4071 if (!iv_ca_has_deps (ivs, new_cp))
4072 continue;
4074 if (!cheaper_cost_pair (new_cp, old_cp))
4075 continue;
4077 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4080 iv_ca_delta_commit (data, ivs, *delta, true);
4081 cost = iv_ca_cost (ivs);
4082 if (n_ivs)
4083 *n_ivs = iv_ca_n_cands (ivs);
4084 iv_ca_delta_commit (data, ivs, *delta, false);
4086 return cost;
4089 /* Try narrowing set IVS by removing CAND. Return the cost of
4090 the new set and store the differences in DELTA. */
4092 static unsigned
4093 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4094 struct iv_cand *cand, struct iv_ca_delta **delta)
4096 unsigned i, ci;
4097 struct iv_use *use;
4098 struct cost_pair *old_cp, *new_cp, *cp;
4099 bitmap_iterator bi;
4100 struct iv_cand *cnd;
4101 unsigned cost;
4103 *delta = NULL;
4104 for (i = 0; i < n_iv_uses (data); i++)
4106 use = iv_use (data, i);
4108 old_cp = iv_ca_cand_for_use (ivs, use);
4109 if (old_cp->cand != cand)
4110 continue;
4112 new_cp = NULL;
4114 if (data->consider_all_candidates)
4116 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4118 if (ci == cand->id)
4119 continue;
4121 cnd = iv_cand (data, ci);
4123 cp = get_use_iv_cost (data, use, cnd);
4124 if (!cp)
4125 continue;
4126 if (!iv_ca_has_deps (ivs, cp))
4127 continue;
4129 if (!cheaper_cost_pair (cp, new_cp))
4130 continue;
4132 new_cp = cp;
4135 else
4137 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4139 if (ci == cand->id)
4140 continue;
4142 cnd = iv_cand (data, ci);
4144 cp = get_use_iv_cost (data, use, cnd);
4145 if (!cp)
4146 continue;
4147 if (!iv_ca_has_deps (ivs, cp))
4148 continue;
4150 if (!cheaper_cost_pair (cp, new_cp))
4151 continue;
4153 new_cp = cp;
4157 if (!new_cp)
4159 iv_ca_delta_free (delta);
4160 return INFTY;
4163 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4166 iv_ca_delta_commit (data, ivs, *delta, true);
4167 cost = iv_ca_cost (ivs);
4168 iv_ca_delta_commit (data, ivs, *delta, false);
4170 return cost;
4173 /* Try optimizing the set of candidates IVS by removing candidates different
4174 from to EXCEPT_CAND from it. Return cost of the new set, and store
4175 differences in DELTA. */
4177 static unsigned
4178 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4179 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4181 bitmap_iterator bi;
4182 struct iv_ca_delta *act_delta, *best_delta;
4183 unsigned i, best_cost, acost;
4184 struct iv_cand *cand;
4186 best_delta = NULL;
4187 best_cost = iv_ca_cost (ivs);
4189 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4191 cand = iv_cand (data, i);
4193 if (cand == except_cand)
4194 continue;
4196 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4198 if (acost < best_cost)
4200 best_cost = acost;
4201 iv_ca_delta_free (&best_delta);
4202 best_delta = act_delta;
4204 else
4205 iv_ca_delta_free (&act_delta);
4208 if (!best_delta)
4210 *delta = NULL;
4211 return best_cost;
4214 /* Recurse to possibly remove other unnecessary ivs. */
4215 iv_ca_delta_commit (data, ivs, best_delta, true);
4216 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4217 iv_ca_delta_commit (data, ivs, best_delta, false);
4218 *delta = iv_ca_delta_join (best_delta, *delta);
4219 return best_cost;
4222 /* Tries to extend the sets IVS in the best possible way in order
4223 to express the USE. */
4225 static bool
4226 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4227 struct iv_use *use)
4229 unsigned best_cost, act_cost;
4230 unsigned i;
4231 bitmap_iterator bi;
4232 struct iv_cand *cand;
4233 struct iv_ca_delta *best_delta = NULL, *act_delta;
4234 struct cost_pair *cp;
4236 iv_ca_add_use (data, ivs, use);
4237 best_cost = iv_ca_cost (ivs);
4239 cp = iv_ca_cand_for_use (ivs, use);
4240 if (cp)
4242 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4243 iv_ca_set_no_cp (data, ivs, use);
4246 /* First try important candidates. Only if it fails, try the specific ones.
4247 Rationale -- in loops with many variables the best choice often is to use
4248 just one generic biv. If we added here many ivs specific to the uses,
4249 the optimization algorithm later would be likely to get stuck in a local
4250 minimum, thus causing us to create too many ivs. The approach from
4251 few ivs to more seems more likely to be successful -- starting from few
4252 ivs, replacing an expensive use by a specific iv should always be a
4253 win. */
4254 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4256 cand = iv_cand (data, i);
4258 if (iv_ca_cand_used_p (ivs, cand))
4259 continue;
4261 cp = get_use_iv_cost (data, use, cand);
4262 if (!cp)
4263 continue;
4265 iv_ca_set_cp (data, ivs, use, cp);
4266 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4267 iv_ca_set_no_cp (data, ivs, use);
4268 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4270 if (act_cost < best_cost)
4272 best_cost = act_cost;
4274 iv_ca_delta_free (&best_delta);
4275 best_delta = act_delta;
4277 else
4278 iv_ca_delta_free (&act_delta);
4281 if (best_cost == INFTY)
4283 for (i = 0; i < use->n_map_members; i++)
4285 cp = use->cost_map + i;
4286 cand = cp->cand;
4287 if (!cand)
4288 continue;
4290 /* Already tried this. */
4291 if (cand->important)
4292 continue;
4294 if (iv_ca_cand_used_p (ivs, cand))
4295 continue;
4297 act_delta = NULL;
4298 iv_ca_set_cp (data, ivs, use, cp);
4299 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4300 iv_ca_set_no_cp (data, ivs, use);
4301 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4302 cp, act_delta);
4304 if (act_cost < best_cost)
4306 best_cost = act_cost;
4308 if (best_delta)
4309 iv_ca_delta_free (&best_delta);
4310 best_delta = act_delta;
4312 else
4313 iv_ca_delta_free (&act_delta);
4317 iv_ca_delta_commit (data, ivs, best_delta, true);
4318 iv_ca_delta_free (&best_delta);
4320 return (best_cost != INFTY);
4323 /* Finds an initial assignment of candidates to uses. */
4325 static struct iv_ca *
4326 get_initial_solution (struct ivopts_data *data)
4328 struct iv_ca *ivs = iv_ca_new (data);
4329 unsigned i;
4331 for (i = 0; i < n_iv_uses (data); i++)
4332 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4334 iv_ca_free (&ivs);
4335 return NULL;
4338 return ivs;
4341 /* Tries to improve set of induction variables IVS. */
4343 static bool
4344 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4346 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4347 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4348 struct iv_cand *cand;
4350 /* Try extending the set of induction variables by one. */
4351 for (i = 0; i < n_iv_cands (data); i++)
4353 cand = iv_cand (data, i);
4355 if (iv_ca_cand_used_p (ivs, cand))
4356 continue;
4358 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4359 if (!act_delta)
4360 continue;
4362 /* If we successfully added the candidate and the set is small enough,
4363 try optimizing it by removing other candidates. */
4364 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4366 iv_ca_delta_commit (data, ivs, act_delta, true);
4367 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4368 iv_ca_delta_commit (data, ivs, act_delta, false);
4369 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4372 if (acost < best_cost)
4374 best_cost = acost;
4375 iv_ca_delta_free (&best_delta);
4376 best_delta = act_delta;
4378 else
4379 iv_ca_delta_free (&act_delta);
4382 if (!best_delta)
4384 /* Try removing the candidates from the set instead. */
4385 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4387 /* Nothing more we can do. */
4388 if (!best_delta)
4389 return false;
4392 iv_ca_delta_commit (data, ivs, best_delta, true);
4393 gcc_assert (best_cost == iv_ca_cost (ivs));
4394 iv_ca_delta_free (&best_delta);
4395 return true;
4398 /* Attempts to find the optimal set of induction variables. We do simple
4399 greedy heuristic -- we try to replace at most one candidate in the selected
4400 solution and remove the unused ivs while this improves the cost. */
4402 static struct iv_ca *
4403 find_optimal_iv_set (struct ivopts_data *data)
4405 unsigned i;
4406 struct iv_ca *set;
4407 struct iv_use *use;
4409 /* Get the initial solution. */
4410 set = get_initial_solution (data);
4411 if (!set)
4413 if (dump_file && (dump_flags & TDF_DETAILS))
4414 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4415 return NULL;
4418 if (dump_file && (dump_flags & TDF_DETAILS))
4420 fprintf (dump_file, "Initial set of candidates:\n");
4421 iv_ca_dump (data, dump_file, set);
4424 while (try_improve_iv_set (data, set))
4426 if (dump_file && (dump_flags & TDF_DETAILS))
4428 fprintf (dump_file, "Improved to:\n");
4429 iv_ca_dump (data, dump_file, set);
4433 if (dump_file && (dump_flags & TDF_DETAILS))
4434 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4436 for (i = 0; i < n_iv_uses (data); i++)
4438 use = iv_use (data, i);
4439 use->selected = iv_ca_cand_for_use (set, use)->cand;
4442 return set;
4445 /* Creates a new induction variable corresponding to CAND. */
4447 static void
4448 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4450 block_stmt_iterator incr_pos;
4451 tree base;
4452 bool after = false;
4454 if (!cand->iv)
4455 return;
4457 switch (cand->pos)
4459 case IP_NORMAL:
4460 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4461 break;
4463 case IP_END:
4464 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4465 after = true;
4466 break;
4468 case IP_ORIGINAL:
4469 /* Mark that the iv is preserved. */
4470 name_info (data, cand->var_before)->preserve_biv = true;
4471 name_info (data, cand->var_after)->preserve_biv = true;
4473 /* Rewrite the increment so that it uses var_before directly. */
4474 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4476 return;
4479 gimple_add_tmp_var (cand->var_before);
4480 add_referenced_tmp_var (cand->var_before);
4482 base = unshare_expr (cand->iv->base);
4484 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4485 &incr_pos, after, &cand->var_before, &cand->var_after);
4488 /* Creates new induction variables described in SET. */
4490 static void
4491 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4493 unsigned i;
4494 struct iv_cand *cand;
4495 bitmap_iterator bi;
4497 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4499 cand = iv_cand (data, i);
4500 create_new_iv (data, cand);
4504 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4505 is true, remove also the ssa name defined by the statement. */
4507 static void
4508 remove_statement (tree stmt, bool including_defined_name)
4510 if (TREE_CODE (stmt) == PHI_NODE)
4512 if (!including_defined_name)
4514 /* Prevent the ssa name defined by the statement from being removed. */
4515 SET_PHI_RESULT (stmt, NULL);
4517 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4519 else
4521 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4523 bsi_remove (&bsi);
4527 /* Rewrites USE (definition of iv used in a nonlinear expression)
4528 using candidate CAND. */
4530 static void
4531 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4532 struct iv_use *use, struct iv_cand *cand)
4534 tree comp = unshare_expr (get_computation (data->current_loop,
4535 use, cand));
4536 tree op, stmts, tgt, ass;
4537 block_stmt_iterator bsi, pbsi;
4539 switch (TREE_CODE (use->stmt))
4541 case PHI_NODE:
4542 tgt = PHI_RESULT (use->stmt);
4544 /* If we should keep the biv, do not replace it. */
4545 if (name_info (data, tgt)->preserve_biv)
4546 return;
4548 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4549 while (!bsi_end_p (pbsi)
4550 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4552 bsi = pbsi;
4553 bsi_next (&pbsi);
4555 break;
4557 case MODIFY_EXPR:
4558 tgt = TREE_OPERAND (use->stmt, 0);
4559 bsi = bsi_for_stmt (use->stmt);
4560 break;
4562 default:
4563 gcc_unreachable ();
4566 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4568 if (TREE_CODE (use->stmt) == PHI_NODE)
4570 if (stmts)
4571 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4572 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4573 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4574 remove_statement (use->stmt, false);
4575 SSA_NAME_DEF_STMT (tgt) = ass;
4577 else
4579 if (stmts)
4580 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4581 TREE_OPERAND (use->stmt, 1) = op;
4585 /* Replaces ssa name in index IDX by its basic variable. Callback for
4586 for_each_index. */
4588 static bool
4589 idx_remove_ssa_names (tree base, tree *idx,
4590 void *data ATTRIBUTE_UNUSED)
4592 tree *op;
4594 if (TREE_CODE (*idx) == SSA_NAME)
4595 *idx = SSA_NAME_VAR (*idx);
4597 if (TREE_CODE (base) == ARRAY_REF)
4599 op = &TREE_OPERAND (base, 2);
4600 if (*op
4601 && TREE_CODE (*op) == SSA_NAME)
4602 *op = SSA_NAME_VAR (*op);
4603 op = &TREE_OPERAND (base, 3);
4604 if (*op
4605 && TREE_CODE (*op) == SSA_NAME)
4606 *op = SSA_NAME_VAR (*op);
4609 return true;
4612 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4614 static tree
4615 unshare_and_remove_ssa_names (tree ref)
4617 ref = unshare_expr (ref);
4618 for_each_index (&ref, idx_remove_ssa_names, NULL);
4620 return ref;
4623 /* Rewrites base of memory access OP with expression WITH in statement
4624 pointed to by BSI. */
4626 static void
4627 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4629 tree bvar, var, new_var, new_name, copy, name;
4630 tree orig;
4632 var = bvar = get_base_address (*op);
4634 if (!var || TREE_CODE (with) != SSA_NAME)
4635 goto do_rewrite;
4637 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4638 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4639 if (TREE_CODE (var) == INDIRECT_REF)
4640 var = TREE_OPERAND (var, 0);
4641 if (TREE_CODE (var) == SSA_NAME)
4643 name = var;
4644 var = SSA_NAME_VAR (var);
4646 else if (DECL_P (var))
4647 name = NULL_TREE;
4648 else
4649 goto do_rewrite;
4651 if (var_ann (var)->type_mem_tag)
4652 var = var_ann (var)->type_mem_tag;
4654 /* We need to add a memory tag for the variable. But we do not want
4655 to add it to the temporary used for the computations, since this leads
4656 to problems in redundancy elimination when there are common parts
4657 in two computations referring to the different arrays. So we copy
4658 the variable to a new temporary. */
4659 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4660 if (name)
4661 new_name = duplicate_ssa_name (name, copy);
4662 else
4664 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4665 add_referenced_tmp_var (new_var);
4666 var_ann (new_var)->type_mem_tag = var;
4667 new_name = make_ssa_name (new_var, copy);
4669 TREE_OPERAND (copy, 0) = new_name;
4670 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4671 with = new_name;
4673 do_rewrite:
4675 orig = NULL_TREE;
4676 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4677 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4679 if (TREE_CODE (*op) == INDIRECT_REF)
4680 orig = REF_ORIGINAL (*op);
4681 if (!orig)
4682 orig = unshare_and_remove_ssa_names (*op);
4684 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4686 /* Record the original reference, for purposes of alias analysis. */
4687 REF_ORIGINAL (*op) = orig;
4690 /* Rewrites USE (address that is an iv) using candidate CAND. */
4692 static void
4693 rewrite_use_address (struct ivopts_data *data,
4694 struct iv_use *use, struct iv_cand *cand)
4696 tree comp = unshare_expr (get_computation (data->current_loop,
4697 use, cand));
4698 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4699 tree stmts;
4700 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4702 if (stmts)
4703 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4705 rewrite_address_base (&bsi, use->op_p, op);
4708 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4709 candidate CAND. */
4711 static void
4712 rewrite_use_compare (struct ivopts_data *data,
4713 struct iv_use *use, struct iv_cand *cand)
4715 tree comp;
4716 tree *op_p, cond, op, stmts, bound;
4717 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4718 enum tree_code compare;
4720 if (may_eliminate_iv (data->current_loop,
4721 use, cand, &compare, &bound))
4723 op = force_gimple_operand (unshare_expr (bound), &stmts,
4724 true, NULL_TREE);
4726 if (stmts)
4727 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4729 *use->op_p = build2 (compare, boolean_type_node,
4730 var_at_stmt (data->current_loop,
4731 cand, use->stmt), op);
4732 modify_stmt (use->stmt);
4733 return;
4736 /* The induction variable elimination failed; just express the original
4737 giv. */
4738 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4740 cond = *use->op_p;
4741 op_p = &TREE_OPERAND (cond, 0);
4742 if (TREE_CODE (*op_p) != SSA_NAME
4743 || zero_p (get_iv (data, *op_p)->step))
4744 op_p = &TREE_OPERAND (cond, 1);
4746 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4747 if (stmts)
4748 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4750 *op_p = op;
4753 /* Ensure that operand *OP_P may be used at the end of EXIT without
4754 violating loop closed ssa form. */
4756 static void
4757 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4759 basic_block def_bb;
4760 struct loop *def_loop;
4761 tree phi, use;
4763 use = USE_FROM_PTR (op_p);
4764 if (TREE_CODE (use) != SSA_NAME)
4765 return;
4767 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4768 if (!def_bb)
4769 return;
4771 def_loop = def_bb->loop_father;
4772 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4773 return;
4775 /* Try finding a phi node that copies the value out of the loop. */
4776 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4777 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4778 break;
4780 if (!phi)
4782 /* Create such a phi node. */
4783 tree new_name = duplicate_ssa_name (use, NULL);
4785 phi = create_phi_node (new_name, exit->dest);
4786 SSA_NAME_DEF_STMT (new_name) = phi;
4787 add_phi_arg (phi, use, exit);
4790 SET_USE (op_p, PHI_RESULT (phi));
4793 /* Ensure that operands of STMT may be used at the end of EXIT without
4794 violating loop closed ssa form. */
4796 static void
4797 protect_loop_closed_ssa_form (edge exit, tree stmt)
4799 use_optype uses;
4800 vuse_optype vuses;
4801 v_may_def_optype v_may_defs;
4802 unsigned i;
4804 get_stmt_operands (stmt);
4806 uses = STMT_USE_OPS (stmt);
4807 for (i = 0; i < NUM_USES (uses); i++)
4808 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4810 vuses = STMT_VUSE_OPS (stmt);
4811 for (i = 0; i < NUM_VUSES (vuses); i++)
4812 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4814 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4815 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4816 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4819 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4820 so that they are emitted on the correct place, and so that the loop closed
4821 ssa form is preserved. */
4823 static void
4824 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4826 tree_stmt_iterator tsi;
4827 block_stmt_iterator bsi;
4828 tree phi, stmt, def, next;
4830 if (EDGE_COUNT (exit->dest->preds) > 1)
4831 split_loop_exit_edge (exit);
4833 if (TREE_CODE (stmts) == STATEMENT_LIST)
4835 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4836 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4838 else
4839 protect_loop_closed_ssa_form (exit, stmts);
4841 /* Ensure there is label in exit->dest, so that we can
4842 insert after it. */
4843 tree_block_label (exit->dest);
4844 bsi = bsi_after_labels (exit->dest);
4845 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4847 if (!op)
4848 return;
4850 for (phi = phi_nodes (exit->dest); phi; phi = next)
4852 next = PHI_CHAIN (phi);
4854 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4856 def = PHI_RESULT (phi);
4857 remove_statement (phi, false);
4858 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4859 def, op);
4860 SSA_NAME_DEF_STMT (def) = stmt;
4861 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4866 /* Rewrites the final value of USE (that is only needed outside of the loop)
4867 using candidate CAND. */
4869 static void
4870 rewrite_use_outer (struct ivopts_data *data,
4871 struct iv_use *use, struct iv_cand *cand)
4873 edge exit;
4874 tree value, op, stmts, tgt;
4875 tree phi;
4877 switch (TREE_CODE (use->stmt))
4879 case PHI_NODE:
4880 tgt = PHI_RESULT (use->stmt);
4881 break;
4882 case MODIFY_EXPR:
4883 tgt = TREE_OPERAND (use->stmt, 0);
4884 break;
4885 default:
4886 gcc_unreachable ();
4889 exit = single_dom_exit (data->current_loop);
4891 if (exit)
4893 if (!cand->iv)
4895 bool ok = may_replace_final_value (data->current_loop, use, &value);
4896 gcc_assert (ok);
4898 else
4899 value = get_computation_at (data->current_loop,
4900 use, cand, last_stmt (exit->src));
4902 value = unshare_expr (value);
4903 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4905 /* If we will preserve the iv anyway and we would need to perform
4906 some computation to replace the final value, do nothing. */
4907 if (stmts && name_info (data, tgt)->preserve_biv)
4908 return;
4910 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4912 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4914 if (USE_FROM_PTR (use_p) == tgt)
4915 SET_USE (use_p, op);
4918 if (stmts)
4919 compute_phi_arg_on_exit (exit, stmts, op);
4921 /* Enable removal of the statement. We cannot remove it directly,
4922 since we may still need the aliasing information attached to the
4923 ssa name defined by it. */
4924 name_info (data, tgt)->iv->have_use_for = false;
4925 return;
4928 /* If the variable is going to be preserved anyway, there is nothing to
4929 do. */
4930 if (name_info (data, tgt)->preserve_biv)
4931 return;
4933 /* Otherwise we just need to compute the iv. */
4934 rewrite_use_nonlinear_expr (data, use, cand);
4937 /* Rewrites USE using candidate CAND. */
4939 static void
4940 rewrite_use (struct ivopts_data *data,
4941 struct iv_use *use, struct iv_cand *cand)
4943 switch (use->type)
4945 case USE_NONLINEAR_EXPR:
4946 rewrite_use_nonlinear_expr (data, use, cand);
4947 break;
4949 case USE_OUTER:
4950 rewrite_use_outer (data, use, cand);
4951 break;
4953 case USE_ADDRESS:
4954 rewrite_use_address (data, use, cand);
4955 break;
4957 case USE_COMPARE:
4958 rewrite_use_compare (data, use, cand);
4959 break;
4961 default:
4962 gcc_unreachable ();
4964 modify_stmt (use->stmt);
4967 /* Rewrite the uses using the selected induction variables. */
4969 static void
4970 rewrite_uses (struct ivopts_data *data)
4972 unsigned i;
4973 struct iv_cand *cand;
4974 struct iv_use *use;
4976 for (i = 0; i < n_iv_uses (data); i++)
4978 use = iv_use (data, i);
4979 cand = use->selected;
4980 gcc_assert (cand);
4982 rewrite_use (data, use, cand);
4986 /* Removes the ivs that are not used after rewriting. */
4988 static void
4989 remove_unused_ivs (struct ivopts_data *data)
4991 unsigned j;
4992 bitmap_iterator bi;
4994 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4996 struct version_info *info;
4998 info = ver_info (data, j);
4999 if (info->iv
5000 && !zero_p (info->iv->step)
5001 && !info->inv_id
5002 && !info->iv->have_use_for
5003 && !info->preserve_biv)
5004 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5008 /* Frees data allocated by the optimization of a single loop. */
5010 static void
5011 free_loop_data (struct ivopts_data *data)
5013 unsigned i, j;
5014 bitmap_iterator bi;
5016 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5018 struct version_info *info;
5020 info = ver_info (data, i);
5021 if (info->iv)
5022 free (info->iv);
5023 info->iv = NULL;
5024 info->has_nonlin_use = false;
5025 info->preserve_biv = false;
5026 info->inv_id = 0;
5028 bitmap_clear (data->relevant);
5029 bitmap_clear (data->important_candidates);
5031 for (i = 0; i < n_iv_uses (data); i++)
5033 struct iv_use *use = iv_use (data, i);
5035 free (use->iv);
5036 BITMAP_XFREE (use->related_cands);
5037 for (j = 0; j < use->n_map_members; j++)
5038 if (use->cost_map[j].depends_on)
5039 BITMAP_XFREE (use->cost_map[j].depends_on);
5040 free (use->cost_map);
5041 free (use);
5043 VARRAY_POP_ALL (data->iv_uses);
5045 for (i = 0; i < n_iv_cands (data); i++)
5047 struct iv_cand *cand = iv_cand (data, i);
5049 if (cand->iv)
5050 free (cand->iv);
5051 free (cand);
5053 VARRAY_POP_ALL (data->iv_candidates);
5055 if (data->version_info_size < num_ssa_names)
5057 data->version_info_size = 2 * num_ssa_names;
5058 free (data->version_info);
5059 data->version_info = xcalloc (data->version_info_size,
5060 sizeof (struct version_info));
5063 data->max_inv_id = 0;
5065 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5067 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5069 SET_DECL_RTL (obj, NULL_RTX);
5071 VARRAY_POP_ALL (decl_rtl_to_reset);
5074 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5075 loop tree. */
5077 static void
5078 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5080 unsigned i;
5082 for (i = 1; i < loops->num; i++)
5083 if (loops->parray[i])
5085 free (loops->parray[i]->aux);
5086 loops->parray[i]->aux = NULL;
5089 free_loop_data (data);
5090 free (data->version_info);
5091 BITMAP_XFREE (data->relevant);
5092 BITMAP_XFREE (data->important_candidates);
5094 VARRAY_FREE (decl_rtl_to_reset);
5095 VARRAY_FREE (data->iv_uses);
5096 VARRAY_FREE (data->iv_candidates);
5099 /* Optimizes the LOOP. Returns true if anything changed. */
5101 static bool
5102 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5104 bool changed = false;
5105 struct iv_ca *iv_ca;
5106 edge exit;
5108 data->current_loop = loop;
5110 if (dump_file && (dump_flags & TDF_DETAILS))
5112 fprintf (dump_file, "Processing loop %d\n", loop->num);
5114 exit = single_dom_exit (loop);
5115 if (exit)
5117 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5118 exit->src->index, exit->dest->index);
5119 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5120 fprintf (dump_file, "\n");
5123 fprintf (dump_file, "\n");
5126 /* For each ssa name determines whether it behaves as an induction variable
5127 in some loop. */
5128 if (!find_induction_variables (data))
5129 goto finish;
5131 /* Finds interesting uses (item 1). */
5132 find_interesting_uses (data);
5133 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5134 goto finish;
5136 /* Finds candidates for the induction variables (item 2). */
5137 find_iv_candidates (data);
5139 /* Calculates the costs (item 3, part 1). */
5140 determine_use_iv_costs (data);
5141 determine_iv_costs (data);
5142 determine_set_costs (data);
5144 /* Find the optimal set of induction variables (item 3, part 2). */
5145 iv_ca = find_optimal_iv_set (data);
5146 if (!iv_ca)
5147 goto finish;
5148 changed = true;
5150 /* Create the new induction variables (item 4, part 1). */
5151 create_new_ivs (data, iv_ca);
5152 iv_ca_free (&iv_ca);
5154 /* Rewrite the uses (item 4, part 2). */
5155 rewrite_uses (data);
5157 /* Remove the ivs that are unused after rewriting. */
5158 remove_unused_ivs (data);
5160 loop_commit_inserts ();
5162 /* We have changed the structure of induction variables; it might happen
5163 that definitions in the scev database refer to some of them that were
5164 eliminated. */
5165 scev_reset ();
5167 finish:
5168 free_loop_data (data);
5170 return changed;
5173 /* Main entry point. Optimizes induction variables in LOOPS. */
5175 void
5176 tree_ssa_iv_optimize (struct loops *loops)
5178 struct loop *loop;
5179 struct ivopts_data data;
5181 tree_ssa_iv_optimize_init (loops, &data);
5183 /* Optimize the loops starting with the innermost ones. */
5184 loop = loops->tree_root;
5185 while (loop->inner)
5186 loop = loop->inner;
5188 #ifdef ENABLE_CHECKING
5189 verify_loop_closed_ssa ();
5190 verify_stmts ();
5191 #endif
5193 /* Scan the loops, inner ones first. */
5194 while (loop != loops->tree_root)
5196 if (dump_file && (dump_flags & TDF_DETAILS))
5197 flow_loop_dump (loop, dump_file, NULL, 1);
5199 tree_ssa_iv_optimize_loop (&data, loop);
5201 if (loop->next)
5203 loop = loop->next;
5204 while (loop->inner)
5205 loop = loop->inner;
5207 else
5208 loop = loop->outer;
5211 #ifdef ENABLE_CHECKING
5212 verify_loop_closed_ssa ();
5213 verify_stmts ();
5214 #endif
5216 tree_ssa_iv_optimize_finalize (loops, &data);