PR target/16201
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob3288080ac52873f096cbe953d832394dcffd097e
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
129 unsigned regs_used; /* Number of registers used. */
132 /* Types of uses. */
133 enum use_type
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
141 /* The candidate - cost pair. */
142 struct cost_pair
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
197 struct ivopts_data
199 /* The currently optimized loop. */
200 struct loop *current_loop;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* The number of candidates in the set. */
248 unsigned n_cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
303 static varray_type decl_rtl_to_reset;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
337 /* The data for LOOP. */
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
342 return loop->aux;
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
347 static edge
348 single_dom_exit (struct loop *loop)
350 edge exit = loop->single_exit;
352 if (!exit)
353 return NULL;
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
358 return exit;
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
367 if (iv->ssa_name)
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 if (iv->base_object)
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
412 fprintf (file, "use %d\n", use->id);
414 switch (use->type)
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 if (use->related_cands)
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
460 unsigned i;
461 struct iv_use *use;
463 for (i = 0; i < n_iv_uses (data); i++)
465 use = iv_use (data, i);
467 dump_use (file, use);
468 fprintf (file, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
478 struct iv *iv = cand->iv;
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
483 if (!iv)
485 fprintf (file, " final value replacement\n");
486 return;
489 switch (cand->pos)
491 case IP_NORMAL:
492 fprintf (file, " incremented before exit test\n");
493 break;
495 case IP_END:
496 fprintf (file, " incremented at end\n");
497 break;
499 case IP_ORIGINAL:
500 fprintf (file, " original biv\n");
501 break;
504 dump_iv (file, iv);
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info *
510 ver_info (struct ivopts_data *data, unsigned ver)
512 return data->version_info + ver;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info *
518 name_info (struct ivopts_data *data, tree name)
520 return ver_info (data, SSA_NAME_VERSION (name));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
524 2^BITS. */
526 static bool
527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
528 HOST_WIDE_INT *x)
530 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
531 unsigned HOST_WIDE_INT inv, ex, val;
532 unsigned i;
534 a &= mask;
535 b &= mask;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a & 1) && !(b & 1))
540 a >>= 1;
541 b >>= 1;
542 bits--;
543 mask >>= 1;
546 if (!(b & 1))
548 /* If b is still even, a is odd and there is no such x. */
549 return false;
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
554 inv = 1;
555 ex = b;
556 for (i = 0; i < bits - 1; i++)
558 inv = (inv * ex) & mask;
559 ex = (ex * ex) & mask;
562 val = (a * inv) & mask;
564 gcc_assert (((val * b) & mask) == a);
566 if ((val >> (bits - 1)) & 1)
567 val |= ~mask;
569 *x = val;
571 return true;
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
575 emitted in LOOP. */
577 static bool
578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582 gcc_assert (bb);
584 if (sbb == loop->latch)
585 return true;
587 if (sbb != bb)
588 return false;
590 return stmt == last_stmt (bb);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
596 static bool
597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
600 basic_block stmt_bb = bb_for_stmt (stmt);
601 block_stmt_iterator bsi;
603 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
604 return false;
606 if (stmt_bb != cand_bb)
607 return true;
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 if (bsi_stmt (bsi) == cand->incremented_at)
614 return false;
615 if (bsi_stmt (bsi) == stmt)
616 return true;
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
623 static bool
624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626 switch (cand->pos)
628 case IP_END:
629 return false;
631 case IP_NORMAL:
632 return stmt_after_ip_normal_pos (loop, stmt);
634 case IP_ORIGINAL:
635 return stmt_after_ip_original_pos (cand, stmt);
637 default:
638 gcc_unreachable ();
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
645 static void
646 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
648 unsigned i;
650 data->version_info_size = 2 * num_ssa_names;
651 data->version_info = xcalloc (data->version_info_size,
652 sizeof (struct version_info));
653 data->relevant = BITMAP_XMALLOC ();
654 data->important_candidates = BITMAP_XMALLOC ();
655 data->max_inv_id = 0;
657 for (i = 1; i < loops->num; i++)
658 if (loops->parray[i])
659 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
669 static tree
670 determine_base_object (tree expr)
672 enum tree_code code = TREE_CODE (expr);
673 tree base, obj, op0, op1;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
676 return NULL_TREE;
678 switch (code)
680 case INTEGER_CST:
681 return NULL_TREE;
683 case ADDR_EXPR:
684 obj = TREE_OPERAND (expr, 0);
685 base = get_base_address (obj);
687 if (!base)
688 return fold_convert (ptr_type_node, expr);
690 if (TREE_CODE (base) == INDIRECT_REF)
691 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
693 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
695 case PLUS_EXPR:
696 case MINUS_EXPR:
697 op0 = determine_base_object (TREE_OPERAND (expr, 0));
698 op1 = determine_base_object (TREE_OPERAND (expr, 1));
700 if (!op1)
701 return op0;
703 if (!op0)
704 return (code == PLUS_EXPR
705 ? op1
706 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
708 return fold (build (code, ptr_type_node, op0, op1));
710 default:
711 return fold_convert (ptr_type_node, expr);
715 /* Allocates an induction variable with given initial value BASE and step STEP
716 for loop LOOP. */
718 static struct iv *
719 alloc_iv (tree base, tree step)
721 struct iv *iv = xcalloc (1, sizeof (struct iv));
723 if (step && integer_zerop (step))
724 step = NULL_TREE;
726 iv->base = base;
727 iv->base_object = determine_base_object (base);
728 iv->step = step;
729 iv->biv_p = false;
730 iv->have_use_for = false;
731 iv->use_id = 0;
732 iv->ssa_name = NULL_TREE;
734 return iv;
737 /* Sets STEP and BASE for induction variable IV. */
739 static void
740 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
742 struct version_info *info = name_info (data, iv);
744 gcc_assert (!info->iv);
746 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
747 info->iv = alloc_iv (base, step);
748 info->iv->ssa_name = iv;
751 /* Finds induction variable declaration for VAR. */
753 static struct iv *
754 get_iv (struct ivopts_data *data, tree var)
756 basic_block bb;
758 if (!name_info (data, var)->iv)
760 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
762 if (!bb
763 || !flow_bb_inside_loop_p (data->current_loop, bb))
764 set_iv (data, var, var, NULL_TREE);
767 return name_info (data, var)->iv;
770 /* Determines the step of a biv defined in PHI. */
772 static tree
773 determine_biv_step (tree phi)
775 struct loop *loop = bb_for_stmt (phi)->loop_father;
776 tree name = PHI_RESULT (phi), base, step;
777 tree type = TREE_TYPE (name);
779 if (!is_gimple_reg (name))
780 return NULL_TREE;
782 if (!simple_iv (loop, phi, name, &base, &step))
783 return NULL_TREE;
785 if (!step)
786 return build_int_cst (type, 0);
788 return step;
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
793 static bool
794 abnormal_ssa_name_p (tree exp)
796 if (!exp)
797 return false;
799 if (TREE_CODE (exp) != SSA_NAME)
800 return false;
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
808 static bool
809 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
810 void *data ATTRIBUTE_UNUSED)
812 if (TREE_CODE (base) == ARRAY_REF)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
815 return false;
816 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
817 return false;
820 return !abnormal_ssa_name_p (*index);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
826 static bool
827 contains_abnormal_ssa_name_p (tree expr)
829 enum tree_code code = TREE_CODE (expr);
830 enum tree_code_class class = TREE_CODE_CLASS (code);
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
844 switch (class)
846 case tcc_binary:
847 case tcc_comparison:
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
849 return true;
851 /* Fallthru. */
852 case tcc_unary:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
854 return true;
856 break;
858 default:
859 gcc_unreachable ();
862 return false;
865 /* Finds basic ivs. */
867 static bool
868 find_bivs (struct ivopts_data *data)
870 tree phi, step, type, base;
871 bool found = false;
872 struct loop *loop = data->current_loop;
874 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
877 continue;
879 step = determine_biv_step (phi);
881 if (!step)
882 continue;
883 if (cst_and_fits_in_hwi (step)
884 && int_cst_value (step) == 0)
885 continue;
887 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
888 if (contains_abnormal_ssa_name_p (base))
889 continue;
891 type = TREE_TYPE (PHI_RESULT (phi));
892 base = fold_convert (type, base);
893 step = fold_convert (type, step);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step))
898 continue;
900 set_iv (data, PHI_RESULT (phi), base, step);
901 found = true;
904 return found;
907 /* Marks basic ivs. */
909 static void
910 mark_bivs (struct ivopts_data *data)
912 tree phi, var;
913 struct iv *iv, *incr_iv;
914 struct loop *loop = data->current_loop;
915 basic_block incr_bb;
917 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
919 iv = get_iv (data, PHI_RESULT (phi));
920 if (!iv)
921 continue;
923 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
924 incr_iv = get_iv (data, var);
925 if (!incr_iv)
926 continue;
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
930 if (incr_bb->loop_father != data->current_loop
931 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
932 continue;
934 iv->biv_p = true;
935 incr_iv->biv_p = true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
942 static bool
943 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
944 tree *base, tree *step)
946 tree lhs;
947 struct loop *loop = data->current_loop;
949 *base = NULL_TREE;
950 *step = NULL_TREE;
952 if (TREE_CODE (stmt) != MODIFY_EXPR)
953 return false;
955 lhs = TREE_OPERAND (stmt, 0);
956 if (TREE_CODE (lhs) != SSA_NAME)
957 return false;
959 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
960 return false;
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
964 if (!zero_p (*step)
965 && !cst_and_fits_in_hwi (*step))
966 return false;
968 if (contains_abnormal_ssa_name_p (*base))
969 return false;
971 return true;
974 /* Finds general ivs in statement STMT. */
976 static void
977 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
979 tree base, step;
981 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
982 return;
984 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
987 /* Finds general ivs in basic block BB. */
989 static void
990 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
992 block_stmt_iterator bsi;
994 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
995 find_givs_in_stmt (data, bsi_stmt (bsi));
998 /* Finds general ivs. */
1000 static void
1001 find_givs (struct ivopts_data *data)
1003 struct loop *loop = data->current_loop;
1004 basic_block *body = get_loop_body_in_dom_order (loop);
1005 unsigned i;
1007 for (i = 0; i < loop->num_nodes; i++)
1008 find_givs_in_bb (data, body[i]);
1009 free (body);
1012 /* Determine the number of iterations of the current loop. */
1014 static void
1015 determine_number_of_iterations (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 edge exit = single_dom_exit (loop);
1020 if (!exit)
1021 return;
1023 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1032 unsigned i;
1033 struct loop *loop = data->current_loop;
1034 bitmap_iterator bi;
1036 if (!find_bivs (data))
1037 return false;
1039 find_givs (data);
1040 mark_bivs (data);
1041 determine_number_of_iterations (data);
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1045 if (loop_data (loop)->niter.niter)
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1049 TDF_SLIM);
1050 fprintf (dump_file, "\n");
1052 fprintf (dump_file, " may be zero if ");
1053 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1054 TDF_SLIM);
1055 fprintf (dump_file, "\n");
1057 fprintf (dump_file, " bogus unless ");
1058 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1059 TDF_SLIM);
1060 fprintf (dump_file, "\n");
1061 fprintf (dump_file, "\n");
1064 fprintf (dump_file, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1068 if (ver_info (data, i)->iv)
1069 dump_iv (dump_file, ver_info (data, i)->iv);
1073 return true;
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use *
1079 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1080 tree stmt, enum use_type use_type)
1082 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1084 use->id = n_iv_uses (data);
1085 use->type = use_type;
1086 use->iv = iv;
1087 use->stmt = stmt;
1088 use->op_p = use_p;
1089 use->related_cands = BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1092 caller. */
1093 iv->ssa_name = NULL_TREE;
1095 if (dump_file && (dump_flags & TDF_DETAILS))
1096 dump_use (dump_file, use);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1100 return use;
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1107 static void
1108 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1110 basic_block bb;
1111 struct version_info *info;
1113 if (TREE_CODE (op) != SSA_NAME
1114 || !is_gimple_reg (op))
1115 return;
1117 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1118 if (bb
1119 && flow_bb_inside_loop_p (data->current_loop, bb))
1120 return;
1122 info = name_info (data, op);
1123 info->name = op;
1124 info->has_nonlin_use |= nonlinear_use;
1125 if (!info->inv_id)
1126 info->inv_id = ++data->max_inv_id;
1127 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1130 /* Checks whether the use OP is interesting and if so, records it
1131 as TYPE. */
1133 static struct iv_use *
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1135 enum use_type type)
1137 struct iv *iv;
1138 struct iv *civ;
1139 tree stmt;
1140 struct iv_use *use;
1142 if (TREE_CODE (op) != SSA_NAME)
1143 return NULL;
1145 iv = get_iv (data, op);
1146 if (!iv)
1147 return NULL;
1149 if (iv->have_use_for)
1151 use = iv_use (data, iv->use_id);
1153 gcc_assert (use->type == USE_NONLINEAR_EXPR
1154 || use->type == USE_OUTER);
1156 if (type == USE_NONLINEAR_EXPR)
1157 use->type = USE_NONLINEAR_EXPR;
1158 return use;
1161 if (zero_p (iv->step))
1163 record_invariant (data, op, true);
1164 return NULL;
1166 iv->have_use_for = true;
1168 civ = xmalloc (sizeof (struct iv));
1169 *civ = *iv;
1171 stmt = SSA_NAME_DEF_STMT (op);
1172 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1173 || TREE_CODE (stmt) == MODIFY_EXPR);
1175 use = record_use (data, NULL, civ, stmt, type);
1176 iv->use_id = use->id;
1178 return use;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use *
1184 find_interesting_uses_op (struct ivopts_data *data, tree op)
1186 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1189 /* Records a definition of induction variable OP that is used outside of the
1190 loop. */
1192 static struct iv_use *
1193 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1195 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1201 static void
1202 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1204 tree *op0_p;
1205 tree *op1_p;
1206 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1207 struct iv const_iv;
1208 tree zero = integer_zero_node;
1210 const_iv.step = NULL_TREE;
1212 if (integer_zerop (*cond_p)
1213 || integer_nonzerop (*cond_p))
1214 return;
1216 if (TREE_CODE (*cond_p) == SSA_NAME)
1218 op0_p = cond_p;
1219 op1_p = &zero;
1221 else
1223 op0_p = &TREE_OPERAND (*cond_p, 0);
1224 op1_p = &TREE_OPERAND (*cond_p, 1);
1227 if (TREE_CODE (*op0_p) == SSA_NAME)
1228 iv0 = get_iv (data, *op0_p);
1229 else
1230 iv0 = &const_iv;
1232 if (TREE_CODE (*op1_p) == SSA_NAME)
1233 iv1 = get_iv (data, *op1_p);
1234 else
1235 iv1 = &const_iv;
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1239 (!iv0 || !iv1)
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1244 find_interesting_uses_op (data, *op0_p);
1245 find_interesting_uses_op (data, *op1_p);
1246 return;
1249 if (zero_p (iv0->step) && zero_p (iv1->step))
1251 /* If both are invariants, this is a work for unswitching. */
1252 return;
1255 civ = xmalloc (sizeof (struct iv));
1256 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1257 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1263 bool
1264 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1266 basic_block def_bb;
1267 unsigned i, len;
1269 if (is_gimple_min_invariant (expr))
1270 return true;
1272 if (TREE_CODE (expr) == SSA_NAME)
1274 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1275 if (def_bb
1276 && flow_bb_inside_loop_p (loop, def_bb))
1277 return false;
1279 return true;
1282 if (!EXPR_P (expr))
1283 return false;
1285 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1286 for (i = 0; i < len; i++)
1287 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1288 return false;
1290 return true;
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data *ivopts_data;
1300 tree stmt;
1301 tree *step_p;
1304 static bool
1305 idx_find_step (tree base, tree *idx, void *data)
1307 struct ifs_ivopts_data *dta = data;
1308 struct iv *iv;
1309 tree step, type, iv_type, iv_step, lbound, off;
1310 struct loop *loop = dta->ivopts_data->current_loop;
1312 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1314 return false;
1316 /* If base is a component ref, require that the offset of the reference
1317 be invariant. */
1318 if (TREE_CODE (base) == COMPONENT_REF)
1320 off = component_ref_field_offset (base);
1321 return expr_invariant_in_loop_p (loop, off);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base) == ARRAY_REF)
1330 step = array_ref_element_size (base);
1331 lbound = array_ref_low_bound (base);
1333 if (!expr_invariant_in_loop_p (loop, step)
1334 || !expr_invariant_in_loop_p (loop, lbound))
1335 return false;
1338 if (TREE_CODE (*idx) != SSA_NAME)
1339 return true;
1341 iv = get_iv (dta->ivopts_data, *idx);
1342 if (!iv)
1343 return false;
1345 *idx = iv->base;
1347 if (!iv->step)
1348 return true;
1350 iv_type = TREE_TYPE (iv->base);
1351 type = build_pointer_type (TREE_TYPE (base));
1352 if (TREE_CODE (base) == ARRAY_REF)
1354 step = array_ref_element_size (base);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step) != INTEGER_CST)
1358 return false;
1360 else
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step = build_int_cst (type, 1);
1364 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1365 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1366 type, iv->base, iv->step, dta->stmt);
1367 else
1368 iv_step = fold_convert (iv_type, iv->step);
1370 if (!iv_step)
1372 /* The index might wrap. */
1373 return false;
1376 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1378 if (!*dta->step_p)
1379 *dta->step_p = step;
1380 else
1381 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1382 *dta->step_p, step);
1384 return true;
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1390 static bool
1391 idx_record_use (tree base, tree *idx,
1392 void *data)
1394 find_interesting_uses_op (data, *idx);
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 find_interesting_uses_op (data, array_ref_element_size (base));
1398 find_interesting_uses_op (data, array_ref_low_bound (base));
1400 return true;
1403 /* Returns true if memory reference REF may be unaligned. */
1405 static bool
1406 may_be_unaligned_p (tree ref)
1408 tree base;
1409 tree base_type;
1410 HOST_WIDE_INT bitsize;
1411 HOST_WIDE_INT bitpos;
1412 tree toffset;
1413 enum machine_mode mode;
1414 int unsignedp, volatilep;
1415 unsigned base_align;
1417 /* The test below is basically copy of what expr.c:normal_inner_ref
1418 does to check whether the object must be loaded by parts when
1419 STRICT_ALIGNMENT is true. */
1420 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1421 &unsignedp, &volatilep, true);
1422 base_type = TREE_TYPE (base);
1423 base_align = TYPE_ALIGN (base_type);
1425 if (mode != BLKmode
1426 && (base_align < GET_MODE_ALIGNMENT (mode)
1427 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1428 || bitpos % BITS_PER_UNIT != 0))
1429 return true;
1431 return false;
1434 /* Finds addresses in *OP_P inside STMT. */
1436 static void
1437 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1439 tree base = unshare_expr (*op_p), step = NULL;
1440 struct iv *civ;
1441 struct ifs_ivopts_data ifs_ivopts_data;
1443 /* Ignore bitfields for now. Not really something terribly complicated
1444 to handle. TODO. */
1445 if (TREE_CODE (base) == COMPONENT_REF
1446 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1447 goto fail;
1449 if (STRICT_ALIGNMENT
1450 && may_be_unaligned_p (base))
1451 goto fail;
1453 ifs_ivopts_data.ivopts_data = data;
1454 ifs_ivopts_data.stmt = stmt;
1455 ifs_ivopts_data.step_p = &step;
1456 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1457 || zero_p (step))
1458 goto fail;
1460 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1461 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1463 if (TREE_CODE (base) == INDIRECT_REF)
1464 base = TREE_OPERAND (base, 0);
1465 else
1466 base = build_addr (base);
1468 civ = alloc_iv (base, step);
1469 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1470 return;
1472 fail:
1473 for_each_index (op_p, idx_record_use, data);
1476 /* Finds and records invariants used in STMT. */
1478 static void
1479 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1481 use_optype uses = NULL;
1482 unsigned i, n;
1483 tree op;
1485 if (TREE_CODE (stmt) == PHI_NODE)
1486 n = PHI_NUM_ARGS (stmt);
1487 else
1489 get_stmt_operands (stmt);
1490 uses = STMT_USE_OPS (stmt);
1491 n = NUM_USES (uses);
1494 for (i = 0; i < n; i++)
1496 if (TREE_CODE (stmt) == PHI_NODE)
1497 op = PHI_ARG_DEF (stmt, i);
1498 else
1499 op = USE_OP (uses, i);
1501 record_invariant (data, op, false);
1505 /* Finds interesting uses of induction variables in the statement STMT. */
1507 static void
1508 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1510 struct iv *iv;
1511 tree op, lhs, rhs;
1512 use_optype uses = NULL;
1513 unsigned i, n;
1515 find_invariants_stmt (data, stmt);
1517 if (TREE_CODE (stmt) == COND_EXPR)
1519 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1520 return;
1523 if (TREE_CODE (stmt) == MODIFY_EXPR)
1525 lhs = TREE_OPERAND (stmt, 0);
1526 rhs = TREE_OPERAND (stmt, 1);
1528 if (TREE_CODE (lhs) == SSA_NAME)
1530 /* If the statement defines an induction variable, the uses are not
1531 interesting by themselves. */
1533 iv = get_iv (data, lhs);
1535 if (iv && !zero_p (iv->step))
1536 return;
1539 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1541 case tcc_comparison:
1542 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1543 return;
1545 case tcc_reference:
1546 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1547 if (REFERENCE_CLASS_P (lhs))
1548 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1549 return;
1551 default: ;
1554 if (REFERENCE_CLASS_P (lhs)
1555 && is_gimple_val (rhs))
1557 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1558 find_interesting_uses_op (data, rhs);
1559 return;
1562 /* TODO -- we should also handle address uses of type
1564 memory = call (whatever);
1568 call (memory). */
1571 if (TREE_CODE (stmt) == PHI_NODE
1572 && bb_for_stmt (stmt) == data->current_loop->header)
1574 lhs = PHI_RESULT (stmt);
1575 iv = get_iv (data, lhs);
1577 if (iv && !zero_p (iv->step))
1578 return;
1581 if (TREE_CODE (stmt) == PHI_NODE)
1582 n = PHI_NUM_ARGS (stmt);
1583 else
1585 uses = STMT_USE_OPS (stmt);
1586 n = NUM_USES (uses);
1589 for (i = 0; i < n; i++)
1591 if (TREE_CODE (stmt) == PHI_NODE)
1592 op = PHI_ARG_DEF (stmt, i);
1593 else
1594 op = USE_OP (uses, i);
1596 if (TREE_CODE (op) != SSA_NAME)
1597 continue;
1599 iv = get_iv (data, op);
1600 if (!iv)
1601 continue;
1603 find_interesting_uses_op (data, op);
1607 /* Finds interesting uses of induction variables outside of loops
1608 on loop exit edge EXIT. */
1610 static void
1611 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1613 tree phi, def;
1615 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1617 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1618 find_interesting_uses_outer (data, def);
1622 /* Finds uses of the induction variables that are interesting. */
1624 static void
1625 find_interesting_uses (struct ivopts_data *data)
1627 basic_block bb;
1628 block_stmt_iterator bsi;
1629 tree phi;
1630 basic_block *body = get_loop_body (data->current_loop);
1631 unsigned i;
1632 struct version_info *info;
1633 edge e;
1635 if (dump_file && (dump_flags & TDF_DETAILS))
1636 fprintf (dump_file, "Uses:\n\n");
1638 for (i = 0; i < data->current_loop->num_nodes; i++)
1640 edge_iterator ei;
1641 bb = body[i];
1643 FOR_EACH_EDGE (e, ei, bb->succs)
1644 if (e->dest != EXIT_BLOCK_PTR
1645 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1646 find_interesting_uses_outside (data, e);
1648 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1649 find_interesting_uses_stmt (data, phi);
1650 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1651 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1654 if (dump_file && (dump_flags & TDF_DETAILS))
1656 bitmap_iterator bi;
1658 fprintf (dump_file, "\n");
1660 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1662 info = ver_info (data, i);
1663 if (info->inv_id)
1665 fprintf (dump_file, " ");
1666 print_generic_expr (dump_file, info->name, TDF_SLIM);
1667 fprintf (dump_file, " is invariant (%d)%s\n",
1668 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1672 fprintf (dump_file, "\n");
1675 free (body);
1678 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1679 position to POS. If USE is not NULL, the candidate is set as related to
1680 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1681 replacement of the final value of the iv by a direct computation. */
1683 static struct iv_cand *
1684 add_candidate_1 (struct ivopts_data *data,
1685 tree base, tree step, bool important, enum iv_position pos,
1686 struct iv_use *use, tree incremented_at)
1688 unsigned i;
1689 struct iv_cand *cand = NULL;
1690 tree type;
1692 if (base)
1694 type = TREE_TYPE (base);
1695 if (!TYPE_UNSIGNED (type))
1697 type = unsigned_type_for (type);
1698 base = fold_convert (type, base);
1699 if (step)
1700 step = fold_convert (type, step);
1704 for (i = 0; i < n_iv_cands (data); i++)
1706 cand = iv_cand (data, i);
1708 if (cand->pos != pos)
1709 continue;
1711 if (cand->incremented_at != incremented_at)
1712 continue;
1714 if (!cand->iv)
1716 if (!base && !step)
1717 break;
1719 continue;
1722 if (!base && !step)
1723 continue;
1725 if (!operand_equal_p (base, cand->iv->base, 0))
1726 continue;
1728 if (zero_p (cand->iv->step))
1730 if (zero_p (step))
1731 break;
1733 else
1735 if (step && operand_equal_p (step, cand->iv->step, 0))
1736 break;
1740 if (i == n_iv_cands (data))
1742 cand = xcalloc (1, sizeof (struct iv_cand));
1743 cand->id = i;
1745 if (!base && !step)
1746 cand->iv = NULL;
1747 else
1748 cand->iv = alloc_iv (base, step);
1750 cand->pos = pos;
1751 if (pos != IP_ORIGINAL && cand->iv)
1753 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1754 cand->var_after = cand->var_before;
1756 cand->important = important;
1757 cand->incremented_at = incremented_at;
1758 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1760 if (dump_file && (dump_flags & TDF_DETAILS))
1761 dump_cand (dump_file, cand);
1764 if (important && !cand->important)
1766 cand->important = true;
1767 if (dump_file && (dump_flags & TDF_DETAILS))
1768 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1771 if (use)
1773 bitmap_set_bit (use->related_cands, i);
1774 if (dump_file && (dump_flags & TDF_DETAILS))
1775 fprintf (dump_file, "Candidate %d is related to use %d\n",
1776 cand->id, use->id);
1779 return cand;
1782 /* Returns true if incrementing the induction variable at the end of the LOOP
1783 is allowed.
1785 The purpose is to avoid splitting latch edge with a biv increment, thus
1786 creating a jump, possibly confusing other optimization passes and leaving
1787 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1788 is not available (so we do not have a better alternative), or if the latch
1789 edge is already nonempty. */
1791 static bool
1792 allow_ip_end_pos_p (struct loop *loop)
1794 if (!ip_normal_pos (loop))
1795 return true;
1797 if (!empty_block_p (ip_end_pos (loop)))
1798 return true;
1800 return false;
1803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1804 position to POS. If USE is not NULL, the candidate is set as related to
1805 it. The candidate computation is scheduled on all available positions. */
1807 static void
1808 add_candidate (struct ivopts_data *data,
1809 tree base, tree step, bool important, struct iv_use *use)
1811 if (ip_normal_pos (data->current_loop))
1812 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1813 if (ip_end_pos (data->current_loop)
1814 && allow_ip_end_pos_p (data->current_loop))
1815 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1818 /* Adds standard iv candidates. */
1820 static void
1821 add_standard_iv_candidates (struct ivopts_data *data)
1823 /* Add 0 + 1 * iteration candidate. */
1824 add_candidate (data,
1825 build_int_cst (unsigned_intSI_type_node, 0),
1826 build_int_cst (unsigned_intSI_type_node, 1),
1827 true, NULL);
1829 /* The same for a long type if it is still fast enough. */
1830 if (BITS_PER_WORD > 32)
1831 add_candidate (data,
1832 build_int_cst (unsigned_intDI_type_node, 0),
1833 build_int_cst (unsigned_intDI_type_node, 1),
1834 true, NULL);
1838 /* Adds candidates bases on the old induction variable IV. */
1840 static void
1841 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1843 tree phi, def;
1844 struct iv_cand *cand;
1846 add_candidate (data, iv->base, iv->step, true, NULL);
1848 /* The same, but with initial value zero. */
1849 add_candidate (data,
1850 build_int_cst (TREE_TYPE (iv->base), 0),
1851 iv->step, true, NULL);
1853 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1854 if (TREE_CODE (phi) == PHI_NODE)
1856 /* Additionally record the possibility of leaving the original iv
1857 untouched. */
1858 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1859 cand = add_candidate_1 (data,
1860 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1861 SSA_NAME_DEF_STMT (def));
1862 cand->var_before = iv->ssa_name;
1863 cand->var_after = def;
1867 /* Adds candidates based on the old induction variables. */
1869 static void
1870 add_old_ivs_candidates (struct ivopts_data *data)
1872 unsigned i;
1873 struct iv *iv;
1874 bitmap_iterator bi;
1876 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1878 iv = ver_info (data, i)->iv;
1879 if (iv && iv->biv_p && !zero_p (iv->step))
1880 add_old_iv_candidates (data, iv);
1884 /* Adds candidates based on the value of the induction variable IV and USE. */
1886 static void
1887 add_iv_value_candidates (struct ivopts_data *data,
1888 struct iv *iv, struct iv_use *use)
1890 add_candidate (data, iv->base, iv->step, false, use);
1892 /* The same, but with initial value zero. */
1893 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1894 iv->step, false, use);
1897 /* Adds candidates based on the address IV and USE. */
1899 static void
1900 add_address_candidates (struct ivopts_data *data,
1901 struct iv *iv, struct iv_use *use)
1903 tree base, abase, tmp, *act;
1905 /* First, the trivial choices. */
1906 add_iv_value_candidates (data, iv, use);
1908 /* Second, try removing the COMPONENT_REFs. */
1909 if (TREE_CODE (iv->base) == ADDR_EXPR)
1911 base = TREE_OPERAND (iv->base, 0);
1912 while (TREE_CODE (base) == COMPONENT_REF
1913 || (TREE_CODE (base) == ARRAY_REF
1914 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1915 base = TREE_OPERAND (base, 0);
1917 if (base != TREE_OPERAND (iv->base, 0))
1919 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1920 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1922 if (TREE_CODE (base) == INDIRECT_REF)
1923 base = TREE_OPERAND (base, 0);
1924 else
1925 base = build_addr (base);
1926 add_candidate (data, base, iv->step, false, use);
1930 /* Third, try removing the constant offset. */
1931 abase = iv->base;
1932 while (TREE_CODE (abase) == PLUS_EXPR
1933 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1934 abase = TREE_OPERAND (abase, 0);
1935 /* We found the offset, so make the copy of the non-shared part and
1936 remove it. */
1937 if (TREE_CODE (abase) == PLUS_EXPR)
1939 tmp = iv->base;
1940 act = &base;
1942 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1944 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1945 NULL_TREE, TREE_OPERAND (tmp, 1));
1946 act = &TREE_OPERAND (*act, 0);
1948 *act = TREE_OPERAND (tmp, 0);
1950 add_candidate (data, base, iv->step, false, use);
1954 /* Possibly adds pseudocandidate for replacing the final value of USE by
1955 a direct computation. */
1957 static void
1958 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1960 struct tree_niter_desc *niter;
1961 struct loop *loop = data->current_loop;
1963 /* We must know where we exit the loop and how many times does it roll. */
1964 if (!single_dom_exit (loop))
1965 return;
1967 niter = &loop_data (loop)->niter;
1968 if (!niter->niter
1969 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1970 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1971 return;
1973 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1976 /* Adds candidates based on the uses. */
1978 static void
1979 add_derived_ivs_candidates (struct ivopts_data *data)
1981 unsigned i;
1983 for (i = 0; i < n_iv_uses (data); i++)
1985 struct iv_use *use = iv_use (data, i);
1987 if (!use)
1988 continue;
1990 switch (use->type)
1992 case USE_NONLINEAR_EXPR:
1993 case USE_COMPARE:
1994 /* Just add the ivs based on the value of the iv used here. */
1995 add_iv_value_candidates (data, use->iv, use);
1996 break;
1998 case USE_OUTER:
1999 add_iv_value_candidates (data, use->iv, use);
2001 /* Additionally, add the pseudocandidate for the possibility to
2002 replace the final value by a direct computation. */
2003 add_iv_outer_candidates (data, use);
2004 break;
2006 case USE_ADDRESS:
2007 add_address_candidates (data, use->iv, use);
2008 break;
2010 default:
2011 gcc_unreachable ();
2016 /* Record important candidates and add them to related_cands bitmaps
2017 if needed. */
2019 static void
2020 record_important_candidates (struct ivopts_data *data)
2022 unsigned i;
2023 struct iv_use *use;
2025 for (i = 0; i < n_iv_cands (data); i++)
2027 struct iv_cand *cand = iv_cand (data, i);
2029 if (cand->important)
2030 bitmap_set_bit (data->important_candidates, i);
2033 data->consider_all_candidates = (n_iv_cands (data)
2034 <= CONSIDER_ALL_CANDIDATES_BOUND);
2036 if (data->consider_all_candidates)
2038 /* We will not need "related_cands" bitmaps in this case,
2039 so release them to decrease peak memory consumption. */
2040 for (i = 0; i < n_iv_uses (data); i++)
2042 use = iv_use (data, i);
2043 BITMAP_XFREE (use->related_cands);
2046 else
2048 /* Add important candidates to the related_cands bitmaps. */
2049 for (i = 0; i < n_iv_uses (data); i++)
2050 bitmap_ior_into (iv_use (data, i)->related_cands,
2051 data->important_candidates);
2055 /* Finds the candidates for the induction variables. */
2057 static void
2058 find_iv_candidates (struct ivopts_data *data)
2060 /* Add commonly used ivs. */
2061 add_standard_iv_candidates (data);
2063 /* Add old induction variables. */
2064 add_old_ivs_candidates (data);
2066 /* Add induction variables derived from uses. */
2067 add_derived_ivs_candidates (data);
2069 /* Record the important candidates. */
2070 record_important_candidates (data);
2073 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2074 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2075 we allocate a simple list to every use. */
2077 static void
2078 alloc_use_cost_map (struct ivopts_data *data)
2080 unsigned i, size, s, j;
2082 for (i = 0; i < n_iv_uses (data); i++)
2084 struct iv_use *use = iv_use (data, i);
2085 bitmap_iterator bi;
2087 if (data->consider_all_candidates)
2088 size = n_iv_cands (data);
2089 else
2091 s = 0;
2092 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2094 s++;
2097 /* Round up to the power of two, so that moduling by it is fast. */
2098 for (size = 1; size < s; size <<= 1)
2099 continue;
2102 use->n_map_members = size;
2103 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2107 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2108 on invariants DEPENDS_ON. */
2110 static void
2111 set_use_iv_cost (struct ivopts_data *data,
2112 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2113 bitmap depends_on)
2115 unsigned i, s;
2117 if (cost == INFTY)
2119 BITMAP_XFREE (depends_on);
2120 return;
2123 if (data->consider_all_candidates)
2125 use->cost_map[cand->id].cand = cand;
2126 use->cost_map[cand->id].cost = cost;
2127 use->cost_map[cand->id].depends_on = depends_on;
2128 return;
2131 /* n_map_members is a power of two, so this computes modulo. */
2132 s = cand->id & (use->n_map_members - 1);
2133 for (i = s; i < use->n_map_members; i++)
2134 if (!use->cost_map[i].cand)
2135 goto found;
2136 for (i = 0; i < s; i++)
2137 if (!use->cost_map[i].cand)
2138 goto found;
2140 gcc_unreachable ();
2142 found:
2143 use->cost_map[i].cand = cand;
2144 use->cost_map[i].cost = cost;
2145 use->cost_map[i].depends_on = depends_on;
2148 /* Gets cost of (USE, CANDIDATE) pair. */
2150 static struct cost_pair *
2151 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2152 struct iv_cand *cand)
2154 unsigned i, s;
2155 struct cost_pair *ret;
2157 if (!cand)
2158 return NULL;
2160 if (data->consider_all_candidates)
2162 ret = use->cost_map + cand->id;
2163 if (!ret->cand)
2164 return NULL;
2166 return ret;
2169 /* n_map_members is a power of two, so this computes modulo. */
2170 s = cand->id & (use->n_map_members - 1);
2171 for (i = s; i < use->n_map_members; i++)
2172 if (use->cost_map[i].cand == cand)
2173 return use->cost_map + i;
2175 for (i = 0; i < s; i++)
2176 if (use->cost_map[i].cand == cand)
2177 return use->cost_map + i;
2179 return NULL;
2182 /* Returns estimate on cost of computing SEQ. */
2184 static unsigned
2185 seq_cost (rtx seq)
2187 unsigned cost = 0;
2188 rtx set;
2190 for (; seq; seq = NEXT_INSN (seq))
2192 set = single_set (seq);
2193 if (set)
2194 cost += rtx_cost (set, SET);
2195 else
2196 cost++;
2199 return cost;
2202 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2203 static rtx
2204 produce_memory_decl_rtl (tree obj, int *regno)
2206 rtx x;
2207 if (!obj)
2208 abort ();
2209 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2211 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2212 x = gen_rtx_SYMBOL_REF (Pmode, name);
2214 else
2215 x = gen_raw_REG (Pmode, (*regno)++);
2217 return gen_rtx_MEM (DECL_MODE (obj), x);
2220 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2221 walk_tree. DATA contains the actual fake register number. */
2223 static tree
2224 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2226 tree obj = NULL_TREE;
2227 rtx x = NULL_RTX;
2228 int *regno = data;
2230 switch (TREE_CODE (*expr_p))
2232 case ADDR_EXPR:
2233 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2234 handled_component_p (*expr_p);
2235 expr_p = &TREE_OPERAND (*expr_p, 0))
2236 continue;
2237 obj = *expr_p;
2238 if (DECL_P (obj))
2239 x = produce_memory_decl_rtl (obj, regno);
2240 break;
2242 case SSA_NAME:
2243 *ws = 0;
2244 obj = SSA_NAME_VAR (*expr_p);
2245 if (!DECL_RTL_SET_P (obj))
2246 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2247 break;
2249 case VAR_DECL:
2250 case PARM_DECL:
2251 case RESULT_DECL:
2252 *ws = 0;
2253 obj = *expr_p;
2255 if (DECL_RTL_SET_P (obj))
2256 break;
2258 if (DECL_MODE (obj) == BLKmode)
2259 x = produce_memory_decl_rtl (obj, regno);
2260 else
2261 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2263 break;
2265 default:
2266 break;
2269 if (x)
2271 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2272 SET_DECL_RTL (obj, x);
2275 return NULL_TREE;
2278 /* Determines cost of the computation of EXPR. */
2280 static unsigned
2281 computation_cost (tree expr)
2283 rtx seq, rslt;
2284 tree type = TREE_TYPE (expr);
2285 unsigned cost;
2286 int regno = 0;
2288 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2289 start_sequence ();
2290 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2291 seq = get_insns ();
2292 end_sequence ();
2294 cost = seq_cost (seq);
2295 if (GET_CODE (rslt) == MEM)
2296 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2298 return cost;
2301 /* Returns variable containing the value of candidate CAND at statement AT. */
2303 static tree
2304 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2306 if (stmt_after_increment (loop, cand, stmt))
2307 return cand->var_after;
2308 else
2309 return cand->var_before;
2312 /* Determines the expression by that USE is expressed from induction variable
2313 CAND at statement AT in LOOP. */
2315 static tree
2316 get_computation_at (struct loop *loop,
2317 struct iv_use *use, struct iv_cand *cand, tree at)
2319 tree ubase = use->iv->base;
2320 tree ustep = use->iv->step;
2321 tree cbase = cand->iv->base;
2322 tree cstep = cand->iv->step;
2323 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2324 tree uutype;
2325 tree expr, delta;
2326 tree ratio;
2327 unsigned HOST_WIDE_INT ustepi, cstepi;
2328 HOST_WIDE_INT ratioi;
2330 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2332 /* We do not have a precision to express the values of use. */
2333 return NULL_TREE;
2336 expr = var_at_stmt (loop, cand, at);
2338 if (TREE_TYPE (expr) != ctype)
2340 /* This may happen with the original ivs. */
2341 expr = fold_convert (ctype, expr);
2344 if (TYPE_UNSIGNED (utype))
2345 uutype = utype;
2346 else
2348 uutype = unsigned_type_for (utype);
2349 ubase = fold_convert (uutype, ubase);
2350 ustep = fold_convert (uutype, ustep);
2353 if (uutype != ctype)
2355 expr = fold_convert (uutype, expr);
2356 cbase = fold_convert (uutype, cbase);
2357 cstep = fold_convert (uutype, cstep);
2360 if (!cst_and_fits_in_hwi (cstep)
2361 || !cst_and_fits_in_hwi (ustep))
2362 return NULL_TREE;
2364 ustepi = int_cst_value (ustep);
2365 cstepi = int_cst_value (cstep);
2367 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2369 /* TODO maybe consider case when ustep divides cstep and the ratio is
2370 a power of 2 (so that the division is fast to execute)? We would
2371 need to be much more careful with overflows etc. then. */
2372 return NULL_TREE;
2375 /* We may need to shift the value if we are after the increment. */
2376 if (stmt_after_increment (loop, cand, at))
2377 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2379 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2380 or |ratio| == 1, it is better to handle this like
2382 ubase - ratio * cbase + ratio * var. */
2384 if (ratioi == 1)
2386 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2387 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2389 else if (ratioi == -1)
2391 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2392 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2394 else if (TREE_CODE (cbase) == INTEGER_CST)
2396 ratio = build_int_cst_type (uutype, ratioi);
2397 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2398 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2399 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2400 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2402 else
2404 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2405 ratio = build_int_cst_type (uutype, ratioi);
2406 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2407 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2410 return fold_convert (utype, expr);
2413 /* Determines the expression by that USE is expressed from induction variable
2414 CAND in LOOP. */
2416 static tree
2417 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2419 return get_computation_at (loop, use, cand, use->stmt);
2422 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2424 static void
2425 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2427 tree op0, op1;
2428 enum tree_code code;
2430 while (1)
2432 if (cst_and_fits_in_hwi (*expr))
2434 *offset += int_cst_value (*expr);
2435 *expr = integer_zero_node;
2436 return;
2439 code = TREE_CODE (*expr);
2441 if (code != PLUS_EXPR && code != MINUS_EXPR)
2442 return;
2444 op0 = TREE_OPERAND (*expr, 0);
2445 op1 = TREE_OPERAND (*expr, 1);
2447 if (cst_and_fits_in_hwi (op1))
2449 if (code == PLUS_EXPR)
2450 *offset += int_cst_value (op1);
2451 else
2452 *offset -= int_cst_value (op1);
2454 *expr = op0;
2455 continue;
2458 if (code != PLUS_EXPR)
2459 return;
2461 if (!cst_and_fits_in_hwi (op0))
2462 return;
2464 *offset += int_cst_value (op0);
2465 *expr = op1;
2469 /* Returns cost of addition in MODE. */
2471 static unsigned
2472 add_cost (enum machine_mode mode)
2474 static unsigned costs[NUM_MACHINE_MODES];
2475 rtx seq;
2476 unsigned cost;
2478 if (costs[mode])
2479 return costs[mode];
2481 start_sequence ();
2482 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2483 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2484 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2485 NULL_RTX);
2486 seq = get_insns ();
2487 end_sequence ();
2489 cost = seq_cost (seq);
2490 if (!cost)
2491 cost = 1;
2493 costs[mode] = cost;
2495 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, "Addition in %s costs %d\n",
2497 GET_MODE_NAME (mode), cost);
2498 return cost;
2501 /* Entry in a hashtable of already known costs for multiplication. */
2502 struct mbc_entry
2504 HOST_WIDE_INT cst; /* The constant to multiply by. */
2505 enum machine_mode mode; /* In mode. */
2506 unsigned cost; /* The cost. */
2509 /* Counts hash value for the ENTRY. */
2511 static hashval_t
2512 mbc_entry_hash (const void *entry)
2514 const struct mbc_entry *e = entry;
2516 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2519 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2521 static int
2522 mbc_entry_eq (const void *entry1, const void *entry2)
2524 const struct mbc_entry *e1 = entry1;
2525 const struct mbc_entry *e2 = entry2;
2527 return (e1->mode == e2->mode
2528 && e1->cst == e2->cst);
2531 /* Returns cost of multiplication by constant CST in MODE. */
2533 static unsigned
2534 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2536 static htab_t costs;
2537 struct mbc_entry **cached, act;
2538 rtx seq;
2539 unsigned cost;
2541 if (!costs)
2542 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2544 act.mode = mode;
2545 act.cst = cst;
2546 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2547 if (*cached)
2548 return (*cached)->cost;
2550 *cached = xmalloc (sizeof (struct mbc_entry));
2551 (*cached)->mode = mode;
2552 (*cached)->cst = cst;
2554 start_sequence ();
2555 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2556 NULL_RTX, 0);
2557 seq = get_insns ();
2558 end_sequence ();
2560 cost = seq_cost (seq);
2562 if (dump_file && (dump_flags & TDF_DETAILS))
2563 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2564 (int) cst, GET_MODE_NAME (mode), cost);
2566 (*cached)->cost = cost;
2568 return cost;
2571 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2572 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2573 variable is omitted. The created memory accesses MODE.
2575 TODO -- there must be some better way. This all is quite crude. */
2577 static unsigned
2578 get_address_cost (bool symbol_present, bool var_present,
2579 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2581 #define MAX_RATIO 128
2582 static sbitmap valid_mult;
2583 static HOST_WIDE_INT rat, off;
2584 static HOST_WIDE_INT min_offset, max_offset;
2585 static unsigned costs[2][2][2][2];
2586 unsigned cost, acost;
2587 rtx seq, addr, base;
2588 bool offset_p, ratio_p;
2589 rtx reg1;
2590 HOST_WIDE_INT s_offset;
2591 unsigned HOST_WIDE_INT mask;
2592 unsigned bits;
2594 if (!valid_mult)
2596 HOST_WIDE_INT i;
2598 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2600 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2601 for (i = 1; i <= 1 << 20; i <<= 1)
2603 XEXP (addr, 1) = GEN_INT (i);
2604 if (!memory_address_p (Pmode, addr))
2605 break;
2607 max_offset = i >> 1;
2608 off = max_offset;
2610 for (i = 1; i <= 1 << 20; i <<= 1)
2612 XEXP (addr, 1) = GEN_INT (-i);
2613 if (!memory_address_p (Pmode, addr))
2614 break;
2616 min_offset = -(i >> 1);
2618 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file, "get_address_cost:\n");
2621 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2622 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2625 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2626 sbitmap_zero (valid_mult);
2627 rat = 1;
2628 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2629 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2631 XEXP (addr, 1) = GEN_INT (i);
2632 if (memory_address_p (Pmode, addr))
2634 SET_BIT (valid_mult, i + MAX_RATIO);
2635 rat = i;
2639 if (dump_file && (dump_flags & TDF_DETAILS))
2641 fprintf (dump_file, " allowed multipliers:");
2642 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2643 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2644 fprintf (dump_file, " %d", (int) i);
2645 fprintf (dump_file, "\n");
2646 fprintf (dump_file, "\n");
2650 bits = GET_MODE_BITSIZE (Pmode);
2651 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2652 offset &= mask;
2653 if ((offset >> (bits - 1) & 1))
2654 offset |= ~mask;
2655 s_offset = offset;
2657 cost = 0;
2658 offset_p = (s_offset != 0
2659 && min_offset <= s_offset && s_offset <= max_offset);
2660 ratio_p = (ratio != 1
2661 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2662 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2664 if (ratio != 1 && !ratio_p)
2665 cost += multiply_by_cost (ratio, Pmode);
2667 if (s_offset && !offset_p && !symbol_present)
2669 cost += add_cost (Pmode);
2670 var_present = true;
2673 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2674 if (!acost)
2676 acost = 0;
2678 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2679 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2680 if (ratio_p)
2681 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2683 if (var_present)
2684 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2686 if (symbol_present)
2688 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2689 if (offset_p)
2690 base = gen_rtx_fmt_e (CONST, Pmode,
2691 gen_rtx_fmt_ee (PLUS, Pmode,
2692 base,
2693 GEN_INT (off)));
2695 else if (offset_p)
2696 base = GEN_INT (off);
2697 else
2698 base = NULL_RTX;
2700 if (base)
2701 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2703 start_sequence ();
2704 addr = memory_address (Pmode, addr);
2705 seq = get_insns ();
2706 end_sequence ();
2708 acost = seq_cost (seq);
2709 acost += address_cost (addr, Pmode);
2711 if (!acost)
2712 acost = 1;
2713 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2716 return cost + acost;
2719 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2720 the bitmap to that we should store it. */
2722 static struct ivopts_data *fd_ivopts_data;
2723 static tree
2724 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2726 bitmap *depends_on = data;
2727 struct version_info *info;
2729 if (TREE_CODE (*expr_p) != SSA_NAME)
2730 return NULL_TREE;
2731 info = name_info (fd_ivopts_data, *expr_p);
2733 if (!info->inv_id || info->has_nonlin_use)
2734 return NULL_TREE;
2736 if (!*depends_on)
2737 *depends_on = BITMAP_XMALLOC ();
2738 bitmap_set_bit (*depends_on, info->inv_id);
2740 return NULL_TREE;
2743 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2744 invariants the computation depends on. */
2746 static unsigned
2747 force_var_cost (struct ivopts_data *data,
2748 tree expr, bitmap *depends_on)
2750 static bool costs_initialized = false;
2751 static unsigned integer_cost;
2752 static unsigned symbol_cost;
2753 static unsigned address_cost;
2754 tree op0, op1;
2755 unsigned cost0, cost1, cost;
2756 enum machine_mode mode;
2758 if (!costs_initialized)
2760 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2761 rtx x = gen_rtx_MEM (DECL_MODE (var),
2762 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2763 tree addr;
2764 tree type = build_pointer_type (integer_type_node);
2766 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2767 2000));
2769 SET_DECL_RTL (var, x);
2770 TREE_STATIC (var) = 1;
2771 addr = build1 (ADDR_EXPR, type, var);
2772 symbol_cost = computation_cost (addr) + 1;
2774 address_cost
2775 = computation_cost (build2 (PLUS_EXPR, type,
2776 addr,
2777 build_int_cst_type (type, 2000))) + 1;
2778 if (dump_file && (dump_flags & TDF_DETAILS))
2780 fprintf (dump_file, "force_var_cost:\n");
2781 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2782 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2783 fprintf (dump_file, " address %d\n", (int) address_cost);
2784 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2785 fprintf (dump_file, "\n");
2788 costs_initialized = true;
2791 if (depends_on)
2793 fd_ivopts_data = data;
2794 walk_tree (&expr, find_depends, depends_on, NULL);
2797 if (SSA_VAR_P (expr))
2798 return 0;
2800 if (TREE_INVARIANT (expr))
2802 if (TREE_CODE (expr) == INTEGER_CST)
2803 return integer_cost;
2805 if (TREE_CODE (expr) == ADDR_EXPR)
2807 tree obj = TREE_OPERAND (expr, 0);
2809 if (TREE_CODE (obj) == VAR_DECL
2810 || TREE_CODE (obj) == PARM_DECL
2811 || TREE_CODE (obj) == RESULT_DECL)
2812 return symbol_cost;
2815 return address_cost;
2818 switch (TREE_CODE (expr))
2820 case PLUS_EXPR:
2821 case MINUS_EXPR:
2822 case MULT_EXPR:
2823 op0 = TREE_OPERAND (expr, 0);
2824 op1 = TREE_OPERAND (expr, 1);
2826 if (is_gimple_val (op0))
2827 cost0 = 0;
2828 else
2829 cost0 = force_var_cost (data, op0, NULL);
2831 if (is_gimple_val (op1))
2832 cost1 = 0;
2833 else
2834 cost1 = force_var_cost (data, op1, NULL);
2836 break;
2838 default:
2839 /* Just an arbitrary value, FIXME. */
2840 return target_spill_cost;
2843 mode = TYPE_MODE (TREE_TYPE (expr));
2844 switch (TREE_CODE (expr))
2846 case PLUS_EXPR:
2847 case MINUS_EXPR:
2848 cost = add_cost (mode);
2849 break;
2851 case MULT_EXPR:
2852 if (cst_and_fits_in_hwi (op0))
2853 cost = multiply_by_cost (int_cst_value (op0), mode);
2854 else if (cst_and_fits_in_hwi (op1))
2855 cost = multiply_by_cost (int_cst_value (op1), mode);
2856 else
2857 return target_spill_cost;
2858 break;
2860 default:
2861 gcc_unreachable ();
2864 cost += cost0;
2865 cost += cost1;
2867 /* Bound the cost by target_spill_cost. The parts of complicated
2868 computations often are either loop invariant or at least can
2869 be shared between several iv uses, so letting this grow without
2870 limits would not give reasonable results. */
2871 return cost < target_spill_cost ? cost : target_spill_cost;
2874 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2875 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2876 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2877 invariants the computation depends on. */
2879 static unsigned
2880 split_address_cost (struct ivopts_data *data,
2881 tree addr, bool *symbol_present, bool *var_present,
2882 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2884 tree core;
2885 HOST_WIDE_INT bitsize;
2886 HOST_WIDE_INT bitpos;
2887 tree toffset;
2888 enum machine_mode mode;
2889 int unsignedp, volatilep;
2891 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2892 &unsignedp, &volatilep, false);
2894 if (toffset != 0
2895 || bitpos % BITS_PER_UNIT != 0
2896 || TREE_CODE (core) != VAR_DECL)
2898 *symbol_present = false;
2899 *var_present = true;
2900 fd_ivopts_data = data;
2901 walk_tree (&addr, find_depends, depends_on, NULL);
2902 return target_spill_cost;
2905 *offset += bitpos / BITS_PER_UNIT;
2906 if (TREE_STATIC (core)
2907 || DECL_EXTERNAL (core))
2909 *symbol_present = true;
2910 *var_present = false;
2911 return 0;
2914 *symbol_present = false;
2915 *var_present = true;
2916 return 0;
2919 /* Estimates cost of expressing difference of addresses E1 - E2 as
2920 var + symbol + offset. The value of offset is added to OFFSET,
2921 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2922 part is missing. DEPENDS_ON is a set of the invariants the computation
2923 depends on. */
2925 static unsigned
2926 ptr_difference_cost (struct ivopts_data *data,
2927 tree e1, tree e2, bool *symbol_present, bool *var_present,
2928 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2930 HOST_WIDE_INT diff = 0;
2931 unsigned cost;
2933 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2935 if (ptr_difference_const (e1, e2, &diff))
2937 *offset += diff;
2938 *symbol_present = false;
2939 *var_present = false;
2940 return 0;
2943 if (e2 == integer_zero_node)
2944 return split_address_cost (data, TREE_OPERAND (e1, 0),
2945 symbol_present, var_present, offset, depends_on);
2947 *symbol_present = false;
2948 *var_present = true;
2950 cost = force_var_cost (data, e1, depends_on);
2951 cost += force_var_cost (data, e2, depends_on);
2952 cost += add_cost (Pmode);
2954 return cost;
2957 /* Estimates cost of expressing difference E1 - E2 as
2958 var + symbol + offset. The value of offset is added to OFFSET,
2959 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2960 part is missing. DEPENDS_ON is a set of the invariants the computation
2961 depends on. */
2963 static unsigned
2964 difference_cost (struct ivopts_data *data,
2965 tree e1, tree e2, bool *symbol_present, bool *var_present,
2966 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2968 unsigned cost;
2969 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2971 strip_offset (&e1, offset);
2972 *offset = -*offset;
2973 strip_offset (&e2, offset);
2974 *offset = -*offset;
2976 if (TREE_CODE (e1) == ADDR_EXPR)
2977 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2978 depends_on);
2979 *symbol_present = false;
2981 if (operand_equal_p (e1, e2, 0))
2983 *var_present = false;
2984 return 0;
2986 *var_present = true;
2987 if (zero_p (e2))
2988 return force_var_cost (data, e1, depends_on);
2990 if (zero_p (e1))
2992 cost = force_var_cost (data, e2, depends_on);
2993 cost += multiply_by_cost (-1, mode);
2995 return cost;
2998 cost = force_var_cost (data, e1, depends_on);
2999 cost += force_var_cost (data, e2, depends_on);
3000 cost += add_cost (mode);
3002 return cost;
3005 /* Determines the cost of the computation by that USE is expressed
3006 from induction variable CAND. If ADDRESS_P is true, we just need
3007 to create an address from it, otherwise we want to get it into
3008 register. A set of invariants we depend on is stored in
3009 DEPENDS_ON. AT is the statement at that the value is computed. */
3011 static unsigned
3012 get_computation_cost_at (struct ivopts_data *data,
3013 struct iv_use *use, struct iv_cand *cand,
3014 bool address_p, bitmap *depends_on, tree at)
3016 tree ubase = use->iv->base, ustep = use->iv->step;
3017 tree cbase, cstep;
3018 tree utype = TREE_TYPE (ubase), ctype;
3019 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3020 HOST_WIDE_INT ratio, aratio;
3021 bool var_present, symbol_present;
3022 unsigned cost = 0, n_sums;
3024 *depends_on = NULL;
3026 /* Only consider real candidates. */
3027 if (!cand->iv)
3028 return INFTY;
3030 cbase = cand->iv->base;
3031 cstep = cand->iv->step;
3032 ctype = TREE_TYPE (cbase);
3034 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3036 /* We do not have a precision to express the values of use. */
3037 return INFTY;
3040 if (address_p)
3042 /* Do not try to express address of an object with computation based
3043 on address of a different object. This may cause problems in rtl
3044 level alias analysis (that does not expect this to be happening,
3045 as this is illegal in C), and would be unlikely to be useful
3046 anyway. */
3047 if (use->iv->base_object
3048 && cand->iv->base_object
3049 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3050 return INFTY;
3053 if (!cst_and_fits_in_hwi (ustep)
3054 || !cst_and_fits_in_hwi (cstep))
3055 return INFTY;
3057 if (TREE_CODE (ubase) == INTEGER_CST
3058 && !cst_and_fits_in_hwi (ubase))
3059 goto fallback;
3061 if (TREE_CODE (cbase) == INTEGER_CST
3062 && !cst_and_fits_in_hwi (cbase))
3063 goto fallback;
3065 ustepi = int_cst_value (ustep);
3066 cstepi = int_cst_value (cstep);
3068 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3070 /* TODO -- add direct handling of this case. */
3071 goto fallback;
3074 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3075 return INFTY;
3077 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3078 or ratio == 1, it is better to handle this like
3080 ubase - ratio * cbase + ratio * var
3082 (also holds in the case ratio == -1, TODO. */
3084 if (TREE_CODE (cbase) == INTEGER_CST)
3086 offset = - ratio * int_cst_value (cbase);
3087 cost += difference_cost (data,
3088 ubase, integer_zero_node,
3089 &symbol_present, &var_present, &offset,
3090 depends_on);
3092 else if (ratio == 1)
3094 cost += difference_cost (data,
3095 ubase, cbase,
3096 &symbol_present, &var_present, &offset,
3097 depends_on);
3099 else
3101 cost += force_var_cost (data, cbase, depends_on);
3102 cost += add_cost (TYPE_MODE (ctype));
3103 cost += difference_cost (data,
3104 ubase, integer_zero_node,
3105 &symbol_present, &var_present, &offset,
3106 depends_on);
3109 /* If we are after the increment, the value of the candidate is higher by
3110 one iteration. */
3111 if (stmt_after_increment (data->current_loop, cand, at))
3112 offset -= ratio * cstepi;
3114 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3115 (symbol/var/const parts may be omitted). If we are looking for an address,
3116 find the cost of addressing this. */
3117 if (address_p)
3118 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3120 /* Otherwise estimate the costs for computing the expression. */
3121 aratio = ratio > 0 ? ratio : -ratio;
3122 if (!symbol_present && !var_present && !offset)
3124 if (ratio != 1)
3125 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3127 return cost;
3130 if (aratio != 1)
3131 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3133 n_sums = 1;
3134 if (var_present
3135 /* Symbol + offset should be compile-time computable. */
3136 && (symbol_present || offset))
3137 n_sums++;
3139 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3141 fallback:
3143 /* Just get the expression, expand it and measure the cost. */
3144 tree comp = get_computation_at (data->current_loop, use, cand, at);
3146 if (!comp)
3147 return INFTY;
3149 if (address_p)
3150 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3152 return computation_cost (comp);
3156 /* Determines the cost of the computation by that USE is expressed
3157 from induction variable CAND. If ADDRESS_P is true, we just need
3158 to create an address from it, otherwise we want to get it into
3159 register. A set of invariants we depend on is stored in
3160 DEPENDS_ON. */
3162 static unsigned
3163 get_computation_cost (struct ivopts_data *data,
3164 struct iv_use *use, struct iv_cand *cand,
3165 bool address_p, bitmap *depends_on)
3167 return get_computation_cost_at (data,
3168 use, cand, address_p, depends_on, use->stmt);
3171 /* Determines cost of basing replacement of USE on CAND in a generic
3172 expression. */
3174 static bool
3175 determine_use_iv_cost_generic (struct ivopts_data *data,
3176 struct iv_use *use, struct iv_cand *cand)
3178 bitmap depends_on;
3179 unsigned cost;
3181 /* The simple case first -- if we need to express value of the preserved
3182 original biv, the cost is 0. This also prevents us from counting the
3183 cost of increment twice -- once at this use and once in the cost of
3184 the candidate. */
3185 if (cand->pos == IP_ORIGINAL
3186 && cand->incremented_at == use->stmt)
3188 set_use_iv_cost (data, use, cand, 0, NULL);
3189 return true;
3192 cost = get_computation_cost (data, use, cand, false, &depends_on);
3193 set_use_iv_cost (data, use, cand, cost, depends_on);
3195 return cost != INFTY;
3198 /* Determines cost of basing replacement of USE on CAND in an address. */
3200 static bool
3201 determine_use_iv_cost_address (struct ivopts_data *data,
3202 struct iv_use *use, struct iv_cand *cand)
3204 bitmap depends_on;
3205 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3207 set_use_iv_cost (data, use, cand, cost, depends_on);
3209 return cost != INFTY;
3212 /* Computes value of induction variable IV in iteration NITER. */
3214 static tree
3215 iv_value (struct iv *iv, tree niter)
3217 tree val;
3218 tree type = TREE_TYPE (iv->base);
3220 niter = fold_convert (type, niter);
3221 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3223 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3226 /* Computes value of candidate CAND at position AT in iteration NITER. */
3228 static tree
3229 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3231 tree val = iv_value (cand->iv, niter);
3232 tree type = TREE_TYPE (cand->iv->base);
3234 if (stmt_after_increment (loop, cand, at))
3235 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3237 return val;
3240 /* Check whether it is possible to express the condition in USE by comparison
3241 of candidate CAND. If so, store the comparison code to COMPARE and the
3242 value compared with to BOUND. */
3244 static bool
3245 may_eliminate_iv (struct loop *loop,
3246 struct iv_use *use, struct iv_cand *cand,
3247 enum tree_code *compare, tree *bound)
3249 basic_block ex_bb;
3250 edge exit;
3251 struct tree_niter_desc niter, new_niter;
3252 tree wider_type, type, base;
3254 /* For now works only for exits that dominate the loop latch. TODO -- extend
3255 for other conditions inside loop body. */
3256 ex_bb = bb_for_stmt (use->stmt);
3257 if (use->stmt != last_stmt (ex_bb)
3258 || TREE_CODE (use->stmt) != COND_EXPR)
3259 return false;
3260 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3261 return false;
3263 exit = EDGE_SUCC (ex_bb, 0);
3264 if (flow_bb_inside_loop_p (loop, exit->dest))
3265 exit = EDGE_SUCC (ex_bb, 1);
3266 if (flow_bb_inside_loop_p (loop, exit->dest))
3267 return false;
3269 niter.niter = NULL_TREE;
3270 number_of_iterations_exit (loop, exit, &niter);
3271 if (!niter.niter
3272 || !integer_nonzerop (niter.assumptions)
3273 || !integer_zerop (niter.may_be_zero))
3274 return false;
3276 if (exit->flags & EDGE_TRUE_VALUE)
3277 *compare = EQ_EXPR;
3278 else
3279 *compare = NE_EXPR;
3281 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3283 /* Let us check there is not some problem with overflows, by checking that
3284 the number of iterations is unchanged. */
3285 base = cand->iv->base;
3286 type = TREE_TYPE (base);
3287 if (stmt_after_increment (loop, cand, use->stmt))
3288 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3290 new_niter.niter = NULL_TREE;
3291 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3292 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3293 &new_niter);
3294 if (!new_niter.niter
3295 || !integer_nonzerop (new_niter.assumptions)
3296 || !integer_zerop (new_niter.may_be_zero))
3297 return false;
3299 wider_type = TREE_TYPE (new_niter.niter);
3300 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3301 wider_type = TREE_TYPE (niter.niter);
3302 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3303 fold_convert (wider_type, new_niter.niter), 0))
3304 return false;
3306 return true;
3309 /* Determines cost of basing replacement of USE on CAND in a condition. */
3311 static bool
3312 determine_use_iv_cost_condition (struct ivopts_data *data,
3313 struct iv_use *use, struct iv_cand *cand)
3315 tree bound;
3316 enum tree_code compare;
3318 /* Only consider real candidates. */
3319 if (!cand->iv)
3321 set_use_iv_cost (data, use, cand, INFTY, NULL);
3322 return false;
3325 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3327 bitmap depends_on = NULL;
3328 unsigned cost = force_var_cost (data, bound, &depends_on);
3330 set_use_iv_cost (data, use, cand, cost, depends_on);
3331 return cost != INFTY;
3334 /* The induction variable elimination failed; just express the original
3335 giv. If it is compared with an invariant, note that we cannot get
3336 rid of it. */
3337 if (TREE_CODE (*use->op_p) == SSA_NAME)
3338 record_invariant (data, *use->op_p, true);
3339 else
3341 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3342 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3345 return determine_use_iv_cost_generic (data, use, cand);
3348 /* Checks whether it is possible to replace the final value of USE by
3349 a direct computation. If so, the formula is stored to *VALUE. */
3351 static bool
3352 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3354 edge exit;
3355 struct tree_niter_desc *niter;
3357 exit = single_dom_exit (loop);
3358 if (!exit)
3359 return false;
3361 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3362 bb_for_stmt (use->stmt)));
3364 niter = &loop_data (loop)->niter;
3365 if (!niter->niter
3366 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3367 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3368 return false;
3370 *value = iv_value (use->iv, niter->niter);
3372 return true;
3375 /* Determines cost of replacing final value of USE using CAND. */
3377 static bool
3378 determine_use_iv_cost_outer (struct ivopts_data *data,
3379 struct iv_use *use, struct iv_cand *cand)
3381 bitmap depends_on;
3382 unsigned cost;
3383 edge exit;
3384 tree value;
3385 struct loop *loop = data->current_loop;
3387 /* The simple case first -- if we need to express value of the preserved
3388 original biv, the cost is 0. This also prevents us from counting the
3389 cost of increment twice -- once at this use and once in the cost of
3390 the candidate. */
3391 if (cand->pos == IP_ORIGINAL
3392 && cand->incremented_at == use->stmt)
3394 set_use_iv_cost (data, use, cand, 0, NULL);
3395 return true;
3398 if (!cand->iv)
3400 if (!may_replace_final_value (loop, use, &value))
3402 set_use_iv_cost (data, use, cand, INFTY, NULL);
3403 return false;
3406 depends_on = NULL;
3407 cost = force_var_cost (data, value, &depends_on);
3409 cost /= AVG_LOOP_NITER (loop);
3411 set_use_iv_cost (data, use, cand, cost, depends_on);
3412 return cost != INFTY;
3415 exit = single_dom_exit (loop);
3416 if (exit)
3418 /* If there is just a single exit, we may use value of the candidate
3419 after we take it to determine the value of use. */
3420 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3421 last_stmt (exit->src));
3422 if (cost != INFTY)
3423 cost /= AVG_LOOP_NITER (loop);
3425 else
3427 /* Otherwise we just need to compute the iv. */
3428 cost = get_computation_cost (data, use, cand, false, &depends_on);
3431 set_use_iv_cost (data, use, cand, cost, depends_on);
3433 return cost != INFTY;
3436 /* Determines cost of basing replacement of USE on CAND. Returns false
3437 if USE cannot be based on CAND. */
3439 static bool
3440 determine_use_iv_cost (struct ivopts_data *data,
3441 struct iv_use *use, struct iv_cand *cand)
3443 switch (use->type)
3445 case USE_NONLINEAR_EXPR:
3446 return determine_use_iv_cost_generic (data, use, cand);
3448 case USE_OUTER:
3449 return determine_use_iv_cost_outer (data, use, cand);
3451 case USE_ADDRESS:
3452 return determine_use_iv_cost_address (data, use, cand);
3454 case USE_COMPARE:
3455 return determine_use_iv_cost_condition (data, use, cand);
3457 default:
3458 gcc_unreachable ();
3462 /* Determines costs of basing the use of the iv on an iv candidate. */
3464 static void
3465 determine_use_iv_costs (struct ivopts_data *data)
3467 unsigned i, j;
3468 struct iv_use *use;
3469 struct iv_cand *cand;
3470 bitmap to_clear = BITMAP_XMALLOC ();
3472 alloc_use_cost_map (data);
3474 for (i = 0; i < n_iv_uses (data); i++)
3476 use = iv_use (data, i);
3478 if (data->consider_all_candidates)
3480 for (j = 0; j < n_iv_cands (data); j++)
3482 cand = iv_cand (data, j);
3483 determine_use_iv_cost (data, use, cand);
3486 else
3488 bitmap_iterator bi;
3490 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3492 cand = iv_cand (data, j);
3493 if (!determine_use_iv_cost (data, use, cand))
3494 bitmap_set_bit (to_clear, j);
3497 /* Remove the candidates for that the cost is infinite from
3498 the list of related candidates. */
3499 bitmap_and_compl_into (use->related_cands, to_clear);
3500 bitmap_clear (to_clear);
3504 BITMAP_XFREE (to_clear);
3506 if (dump_file && (dump_flags & TDF_DETAILS))
3508 fprintf (dump_file, "Use-candidate costs:\n");
3510 for (i = 0; i < n_iv_uses (data); i++)
3512 use = iv_use (data, i);
3514 fprintf (dump_file, "Use %d:\n", i);
3515 fprintf (dump_file, " cand\tcost\tdepends on\n");
3516 for (j = 0; j < use->n_map_members; j++)
3518 if (!use->cost_map[j].cand
3519 || use->cost_map[j].cost == INFTY)
3520 continue;
3522 fprintf (dump_file, " %d\t%d\t",
3523 use->cost_map[j].cand->id,
3524 use->cost_map[j].cost);
3525 if (use->cost_map[j].depends_on)
3526 bitmap_print (dump_file,
3527 use->cost_map[j].depends_on, "","");
3528 fprintf (dump_file, "\n");
3531 fprintf (dump_file, "\n");
3533 fprintf (dump_file, "\n");
3537 /* Determines cost of the candidate CAND. */
3539 static void
3540 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3542 unsigned cost_base, cost_step;
3543 tree base;
3545 if (!cand->iv)
3547 cand->cost = 0;
3548 return;
3551 /* There are two costs associated with the candidate -- its increment
3552 and its initialization. The second is almost negligible for any loop
3553 that rolls enough, so we take it just very little into account. */
3555 base = cand->iv->base;
3556 cost_base = force_var_cost (data, base, NULL);
3557 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3559 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3561 /* Prefer the original iv unless we may gain something by replacing it. */
3562 if (cand->pos == IP_ORIGINAL)
3563 cand->cost--;
3565 /* Prefer not to insert statements into latch unless there are some
3566 already (so that we do not create unnecessary jumps). */
3567 if (cand->pos == IP_END
3568 && empty_block_p (ip_end_pos (data->current_loop)))
3569 cand->cost++;
3572 /* Determines costs of computation of the candidates. */
3574 static void
3575 determine_iv_costs (struct ivopts_data *data)
3577 unsigned i;
3579 if (dump_file && (dump_flags & TDF_DETAILS))
3581 fprintf (dump_file, "Candidate costs:\n");
3582 fprintf (dump_file, " cand\tcost\n");
3585 for (i = 0; i < n_iv_cands (data); i++)
3587 struct iv_cand *cand = iv_cand (data, i);
3589 determine_iv_cost (data, cand);
3591 if (dump_file && (dump_flags & TDF_DETAILS))
3592 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, "\n");
3599 /* Calculates cost for having SIZE induction variables. */
3601 static unsigned
3602 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3604 return global_cost_for_size (size,
3605 loop_data (data->current_loop)->regs_used,
3606 n_iv_uses (data));
3609 /* For each size of the induction variable set determine the penalty. */
3611 static void
3612 determine_set_costs (struct ivopts_data *data)
3614 unsigned j, n;
3615 tree phi, op;
3616 struct loop *loop = data->current_loop;
3617 bitmap_iterator bi;
3619 /* We use the following model (definitely improvable, especially the
3620 cost function -- TODO):
3622 We estimate the number of registers available (using MD data), name it A.
3624 We estimate the number of registers used by the loop, name it U. This
3625 number is obtained as the number of loop phi nodes (not counting virtual
3626 registers and bivs) + the number of variables from outside of the loop.
3628 We set a reserve R (free regs that are used for temporary computations,
3629 etc.). For now the reserve is a constant 3.
3631 Let I be the number of induction variables.
3633 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3634 make a lot of ivs without a reason).
3635 -- if A - R < U + I <= A, the cost is I * PRES_COST
3636 -- if U + I > A, the cost is I * PRES_COST and
3637 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3641 fprintf (dump_file, "Global costs:\n");
3642 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3643 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3644 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3645 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3648 n = 0;
3649 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3651 op = PHI_RESULT (phi);
3653 if (!is_gimple_reg (op))
3654 continue;
3656 if (get_iv (data, op))
3657 continue;
3659 n++;
3662 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3664 struct version_info *info = ver_info (data, j);
3666 if (info->inv_id && info->has_nonlin_use)
3667 n++;
3670 loop_data (loop)->regs_used = n;
3671 if (dump_file && (dump_flags & TDF_DETAILS))
3672 fprintf (dump_file, " regs_used %d\n", n);
3674 if (dump_file && (dump_flags & TDF_DETAILS))
3676 fprintf (dump_file, " cost for size:\n");
3677 fprintf (dump_file, " ivs\tcost\n");
3678 for (j = 0; j <= 2 * target_avail_regs; j++)
3679 fprintf (dump_file, " %d\t%d\n", j,
3680 ivopts_global_cost_for_size (data, j));
3681 fprintf (dump_file, "\n");
3685 /* Returns true if A is a cheaper cost pair than B. */
3687 static bool
3688 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3690 if (!a)
3691 return false;
3693 if (!b)
3694 return true;
3696 if (a->cost < b->cost)
3697 return true;
3699 if (a->cost > b->cost)
3700 return false;
3702 /* In case the costs are the same, prefer the cheaper candidate. */
3703 if (a->cand->cost < b->cand->cost)
3704 return true;
3706 return false;
3709 /* Computes the cost field of IVS structure. */
3711 static void
3712 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3714 unsigned cost = 0;
3716 cost += ivs->cand_use_cost;
3717 cost += ivs->cand_cost;
3718 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3720 ivs->cost = cost;
3723 /* Set USE not to be expressed by any candidate in IVS. */
3725 static void
3726 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3727 struct iv_use *use)
3729 unsigned uid = use->id, cid, iid;
3730 bitmap deps;
3731 struct cost_pair *cp;
3732 bitmap_iterator bi;
3734 cp = ivs->cand_for_use[uid];
3735 if (!cp)
3736 return;
3737 cid = cp->cand->id;
3739 ivs->bad_uses++;
3740 ivs->cand_for_use[uid] = NULL;
3741 ivs->n_cand_uses[cid]--;
3743 if (ivs->n_cand_uses[cid] == 0)
3745 bitmap_clear_bit (ivs->cands, cid);
3746 /* Do not count the pseudocandidates. */
3747 if (cp->cand->iv)
3748 ivs->n_regs--;
3749 ivs->n_cands--;
3750 ivs->cand_cost -= cp->cand->cost;
3753 ivs->cand_use_cost -= cp->cost;
3755 deps = cp->depends_on;
3757 if (deps)
3759 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3761 ivs->n_invariant_uses[iid]--;
3762 if (ivs->n_invariant_uses[iid] == 0)
3763 ivs->n_regs--;
3767 iv_ca_recount_cost (data, ivs);
3770 /* Set cost pair for USE in set IVS to CP. */
3772 static void
3773 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3774 struct iv_use *use, struct cost_pair *cp)
3776 unsigned uid = use->id, cid, iid;
3777 bitmap deps;
3778 bitmap_iterator bi;
3780 if (ivs->cand_for_use[uid] == cp)
3781 return;
3783 if (ivs->cand_for_use[uid])
3784 iv_ca_set_no_cp (data, ivs, use);
3786 if (cp)
3788 cid = cp->cand->id;
3790 ivs->bad_uses--;
3791 ivs->cand_for_use[uid] = cp;
3792 ivs->n_cand_uses[cid]++;
3793 if (ivs->n_cand_uses[cid] == 1)
3795 bitmap_set_bit (ivs->cands, cid);
3796 /* Do not count the pseudocandidates. */
3797 if (cp->cand->iv)
3798 ivs->n_regs++;
3799 ivs->n_cands++;
3800 ivs->cand_cost += cp->cand->cost;
3803 ivs->cand_use_cost += cp->cost;
3805 deps = cp->depends_on;
3807 if (deps)
3809 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3811 ivs->n_invariant_uses[iid]++;
3812 if (ivs->n_invariant_uses[iid] == 1)
3813 ivs->n_regs++;
3817 iv_ca_recount_cost (data, ivs);
3821 /* Extend set IVS by expressing USE by some of the candidates in it
3822 if possible. */
3824 static void
3825 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3826 struct iv_use *use)
3828 struct cost_pair *best_cp = NULL, *cp;
3829 bitmap_iterator bi;
3830 unsigned i;
3832 gcc_assert (ivs->upto >= use->id);
3834 if (ivs->upto == use->id)
3836 ivs->upto++;
3837 ivs->bad_uses++;
3840 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3842 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3844 if (cheaper_cost_pair (cp, best_cp))
3845 best_cp = cp;
3848 iv_ca_set_cp (data, ivs, use, best_cp);
3851 /* Get cost for assignment IVS. */
3853 static unsigned
3854 iv_ca_cost (struct iv_ca *ivs)
3856 return (ivs->bad_uses ? INFTY : ivs->cost);
3859 /* Returns true if all dependences of CP are among invariants in IVS. */
3861 static bool
3862 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3864 unsigned i;
3865 bitmap_iterator bi;
3867 if (!cp->depends_on)
3868 return true;
3870 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3872 if (ivs->n_invariant_uses[i] == 0)
3873 return false;
3876 return true;
3879 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3880 it before NEXT_CHANGE. */
3882 static struct iv_ca_delta *
3883 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3884 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3886 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3888 change->use = use;
3889 change->old_cp = old_cp;
3890 change->new_cp = new_cp;
3891 change->next_change = next_change;
3893 return change;
3896 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3897 are rewritten. */
3899 static struct iv_ca_delta *
3900 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
3902 struct iv_ca_delta *last;
3904 if (!l2)
3905 return l1;
3907 if (!l1)
3908 return l2;
3910 for (last = l1; last->next_change; last = last->next_change)
3911 continue;
3912 last->next_change = l2;
3914 return l1;
3917 /* Returns candidate by that USE is expressed in IVS. */
3919 static struct cost_pair *
3920 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3922 return ivs->cand_for_use[use->id];
3925 /* Reverse the list of changes DELTA, forming the inverse to it. */
3927 static struct iv_ca_delta *
3928 iv_ca_delta_reverse (struct iv_ca_delta *delta)
3930 struct iv_ca_delta *act, *next, *prev = NULL;
3931 struct cost_pair *tmp;
3933 for (act = delta; act; act = next)
3935 next = act->next_change;
3936 act->next_change = prev;
3937 prev = act;
3939 tmp = act->old_cp;
3940 act->old_cp = act->new_cp;
3941 act->new_cp = tmp;
3944 return prev;
3947 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3948 reverted instead. */
3950 static void
3951 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3952 struct iv_ca_delta *delta, bool forward)
3954 struct cost_pair *from, *to;
3955 struct iv_ca_delta *act;
3957 if (!forward)
3958 delta = iv_ca_delta_reverse (delta);
3960 for (act = delta; act; act = act->next_change)
3962 from = act->old_cp;
3963 to = act->new_cp;
3964 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
3965 iv_ca_set_cp (data, ivs, act->use, to);
3968 if (!forward)
3969 iv_ca_delta_reverse (delta);
3972 /* Returns true if CAND is used in IVS. */
3974 static bool
3975 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3977 return ivs->n_cand_uses[cand->id] > 0;
3980 /* Returns number of induction variable candidates in the set IVS. */
3982 static unsigned
3983 iv_ca_n_cands (struct iv_ca *ivs)
3985 return ivs->n_cands;
3988 /* Free the list of changes DELTA. */
3990 static void
3991 iv_ca_delta_free (struct iv_ca_delta **delta)
3993 struct iv_ca_delta *act, *next;
3995 for (act = *delta; act; act = next)
3997 next = act->next_change;
3998 free (act);
4001 *delta = NULL;
4004 /* Allocates new iv candidates assignment. */
4006 static struct iv_ca *
4007 iv_ca_new (struct ivopts_data *data)
4009 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4011 nw->upto = 0;
4012 nw->bad_uses = 0;
4013 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4014 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4015 nw->cands = BITMAP_XMALLOC ();
4016 nw->n_cands = 0;
4017 nw->n_regs = 0;
4018 nw->cand_use_cost = 0;
4019 nw->cand_cost = 0;
4020 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4021 nw->cost = 0;
4023 return nw;
4026 /* Free memory occupied by the set IVS. */
4028 static void
4029 iv_ca_free (struct iv_ca **ivs)
4031 free ((*ivs)->cand_for_use);
4032 free ((*ivs)->n_cand_uses);
4033 BITMAP_XFREE ((*ivs)->cands);
4034 free ((*ivs)->n_invariant_uses);
4035 free (*ivs);
4036 *ivs = NULL;
4039 /* Dumps IVS to FILE. */
4041 static void
4042 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4044 const char *pref = " invariants ";
4045 unsigned i;
4047 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4048 bitmap_print (file, ivs->cands, " candidates ","\n");
4050 for (i = 1; i <= data->max_inv_id; i++)
4051 if (ivs->n_invariant_uses[i])
4053 fprintf (file, "%s%d", pref, i);
4054 pref = ", ";
4056 fprintf (file, "\n");
4059 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4060 new set, and store differences in DELTA. Number of induction variables
4061 in the new set is stored to N_IVS. */
4063 static unsigned
4064 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4065 struct iv_cand *cand, struct iv_ca_delta **delta,
4066 unsigned *n_ivs)
4068 unsigned i, cost;
4069 struct iv_use *use;
4070 struct cost_pair *old_cp, *new_cp;
4072 *delta = NULL;
4073 for (i = 0; i < ivs->upto; i++)
4075 use = iv_use (data, i);
4076 old_cp = iv_ca_cand_for_use (ivs, use);
4078 if (old_cp
4079 && old_cp->cand == cand)
4080 continue;
4082 new_cp = get_use_iv_cost (data, use, cand);
4083 if (!new_cp)
4084 continue;
4086 if (!iv_ca_has_deps (ivs, new_cp))
4087 continue;
4089 if (!cheaper_cost_pair (new_cp, old_cp))
4090 continue;
4092 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4095 iv_ca_delta_commit (data, ivs, *delta, true);
4096 cost = iv_ca_cost (ivs);
4097 if (n_ivs)
4098 *n_ivs = iv_ca_n_cands (ivs);
4099 iv_ca_delta_commit (data, ivs, *delta, false);
4101 return cost;
4104 /* Try narrowing set IVS by removing CAND. Return the cost of
4105 the new set and store the differences in DELTA. */
4107 static unsigned
4108 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4109 struct iv_cand *cand, struct iv_ca_delta **delta)
4111 unsigned i, ci;
4112 struct iv_use *use;
4113 struct cost_pair *old_cp, *new_cp, *cp;
4114 bitmap_iterator bi;
4115 struct iv_cand *cnd;
4116 unsigned cost;
4118 *delta = NULL;
4119 for (i = 0; i < n_iv_uses (data); i++)
4121 use = iv_use (data, i);
4123 old_cp = iv_ca_cand_for_use (ivs, use);
4124 if (old_cp->cand != cand)
4125 continue;
4127 new_cp = NULL;
4129 if (data->consider_all_candidates)
4131 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4133 if (ci == cand->id)
4134 continue;
4136 cnd = iv_cand (data, ci);
4138 cp = get_use_iv_cost (data, use, cnd);
4139 if (!cp)
4140 continue;
4141 if (!iv_ca_has_deps (ivs, cp))
4142 continue;
4144 if (!cheaper_cost_pair (cp, new_cp))
4145 continue;
4147 new_cp = cp;
4150 else
4152 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4154 if (ci == cand->id)
4155 continue;
4157 cnd = iv_cand (data, ci);
4159 cp = get_use_iv_cost (data, use, cnd);
4160 if (!cp)
4161 continue;
4162 if (!iv_ca_has_deps (ivs, cp))
4163 continue;
4165 if (!cheaper_cost_pair (cp, new_cp))
4166 continue;
4168 new_cp = cp;
4172 if (!new_cp)
4174 iv_ca_delta_free (delta);
4175 return INFTY;
4178 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4181 iv_ca_delta_commit (data, ivs, *delta, true);
4182 cost = iv_ca_cost (ivs);
4183 iv_ca_delta_commit (data, ivs, *delta, false);
4185 return cost;
4188 /* Try optimizing the set of candidates IVS by removing candidates different
4189 from to EXCEPT_CAND from it. Return cost of the new set, and store
4190 differences in DELTA. */
4192 static unsigned
4193 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4194 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4196 bitmap_iterator bi;
4197 struct iv_ca_delta *act_delta, *best_delta;
4198 unsigned i, best_cost, acost;
4199 struct iv_cand *cand;
4201 best_delta = NULL;
4202 best_cost = iv_ca_cost (ivs);
4204 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4206 cand = iv_cand (data, i);
4208 if (cand == except_cand)
4209 continue;
4211 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4213 if (acost < best_cost)
4215 best_cost = acost;
4216 iv_ca_delta_free (&best_delta);
4217 best_delta = act_delta;
4219 else
4220 iv_ca_delta_free (&act_delta);
4223 if (!best_delta)
4225 *delta = NULL;
4226 return best_cost;
4229 /* Recurse to possibly remove other unnecessary ivs. */
4230 iv_ca_delta_commit (data, ivs, best_delta, true);
4231 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4232 iv_ca_delta_commit (data, ivs, best_delta, false);
4233 *delta = iv_ca_delta_join (best_delta, *delta);
4234 return best_cost;
4237 /* Tries to extend the sets IVS in the best possible way in order
4238 to express the USE. */
4240 static bool
4241 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4242 struct iv_use *use)
4244 unsigned best_cost, act_cost;
4245 unsigned i;
4246 bitmap_iterator bi;
4247 struct iv_cand *cand;
4248 struct iv_ca_delta *best_delta = NULL, *act_delta;
4249 struct cost_pair *cp;
4251 iv_ca_add_use (data, ivs, use);
4252 best_cost = iv_ca_cost (ivs);
4254 cp = iv_ca_cand_for_use (ivs, use);
4255 if (cp)
4257 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4258 iv_ca_set_no_cp (data, ivs, use);
4261 /* First try important candidates. Only if it fails, try the specific ones.
4262 Rationale -- in loops with many variables the best choice often is to use
4263 just one generic biv. If we added here many ivs specific to the uses,
4264 the optimization algorithm later would be likely to get stuck in a local
4265 minimum, thus causing us to create too many ivs. The approach from
4266 few ivs to more seems more likely to be successful -- starting from few
4267 ivs, replacing an expensive use by a specific iv should always be a
4268 win. */
4269 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4271 cand = iv_cand (data, i);
4273 if (iv_ca_cand_used_p (ivs, cand))
4274 continue;
4276 cp = get_use_iv_cost (data, use, cand);
4277 if (!cp)
4278 continue;
4280 iv_ca_set_cp (data, ivs, use, cp);
4281 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4282 iv_ca_set_no_cp (data, ivs, use);
4283 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4285 if (act_cost < best_cost)
4287 best_cost = act_cost;
4289 iv_ca_delta_free (&best_delta);
4290 best_delta = act_delta;
4292 else
4293 iv_ca_delta_free (&act_delta);
4296 if (best_cost == INFTY)
4298 for (i = 0; i < use->n_map_members; i++)
4300 cp = use->cost_map + i;
4301 cand = cp->cand;
4302 if (!cand)
4303 continue;
4305 /* Already tried this. */
4306 if (cand->important)
4307 continue;
4309 if (iv_ca_cand_used_p (ivs, cand))
4310 continue;
4312 act_delta = NULL;
4313 iv_ca_set_cp (data, ivs, use, cp);
4314 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4315 iv_ca_set_no_cp (data, ivs, use);
4316 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4317 cp, act_delta);
4319 if (act_cost < best_cost)
4321 best_cost = act_cost;
4323 if (best_delta)
4324 iv_ca_delta_free (&best_delta);
4325 best_delta = act_delta;
4327 else
4328 iv_ca_delta_free (&act_delta);
4332 iv_ca_delta_commit (data, ivs, best_delta, true);
4333 iv_ca_delta_free (&best_delta);
4335 return (best_cost != INFTY);
4338 /* Finds an initial assignment of candidates to uses. */
4340 static struct iv_ca *
4341 get_initial_solution (struct ivopts_data *data)
4343 struct iv_ca *ivs = iv_ca_new (data);
4344 unsigned i;
4346 for (i = 0; i < n_iv_uses (data); i++)
4347 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4349 iv_ca_free (&ivs);
4350 return NULL;
4353 return ivs;
4356 /* Tries to improve set of induction variables IVS. */
4358 static bool
4359 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4361 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4362 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4363 struct iv_cand *cand;
4365 /* Try extending the set of induction variables by one. */
4366 for (i = 0; i < n_iv_cands (data); i++)
4368 cand = iv_cand (data, i);
4370 if (iv_ca_cand_used_p (ivs, cand))
4371 continue;
4373 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4374 if (!act_delta)
4375 continue;
4377 /* If we successfully added the candidate and the set is small enough,
4378 try optimizing it by removing other candidates. */
4379 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4381 iv_ca_delta_commit (data, ivs, act_delta, true);
4382 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4383 iv_ca_delta_commit (data, ivs, act_delta, false);
4384 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4387 if (acost < best_cost)
4389 best_cost = acost;
4390 iv_ca_delta_free (&best_delta);
4391 best_delta = act_delta;
4393 else
4394 iv_ca_delta_free (&act_delta);
4397 if (!best_delta)
4399 /* Try removing the candidates from the set instead. */
4400 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4402 /* Nothing more we can do. */
4403 if (!best_delta)
4404 return false;
4407 iv_ca_delta_commit (data, ivs, best_delta, true);
4408 gcc_assert (best_cost == iv_ca_cost (ivs));
4409 iv_ca_delta_free (&best_delta);
4410 return true;
4413 /* Attempts to find the optimal set of induction variables. We do simple
4414 greedy heuristic -- we try to replace at most one candidate in the selected
4415 solution and remove the unused ivs while this improves the cost. */
4417 static struct iv_ca *
4418 find_optimal_iv_set (struct ivopts_data *data)
4420 unsigned i;
4421 struct iv_ca *set;
4422 struct iv_use *use;
4424 /* Get the initial solution. */
4425 set = get_initial_solution (data);
4426 if (!set)
4428 if (dump_file && (dump_flags & TDF_DETAILS))
4429 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4430 return NULL;
4433 if (dump_file && (dump_flags & TDF_DETAILS))
4435 fprintf (dump_file, "Initial set of candidates:\n");
4436 iv_ca_dump (data, dump_file, set);
4439 while (try_improve_iv_set (data, set))
4441 if (dump_file && (dump_flags & TDF_DETAILS))
4443 fprintf (dump_file, "Improved to:\n");
4444 iv_ca_dump (data, dump_file, set);
4448 if (dump_file && (dump_flags & TDF_DETAILS))
4449 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4451 for (i = 0; i < n_iv_uses (data); i++)
4453 use = iv_use (data, i);
4454 use->selected = iv_ca_cand_for_use (set, use)->cand;
4457 return set;
4460 /* Creates a new induction variable corresponding to CAND. */
4462 static void
4463 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4465 block_stmt_iterator incr_pos;
4466 tree base;
4467 bool after = false;
4469 if (!cand->iv)
4470 return;
4472 switch (cand->pos)
4474 case IP_NORMAL:
4475 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4476 break;
4478 case IP_END:
4479 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4480 after = true;
4481 break;
4483 case IP_ORIGINAL:
4484 /* Mark that the iv is preserved. */
4485 name_info (data, cand->var_before)->preserve_biv = true;
4486 name_info (data, cand->var_after)->preserve_biv = true;
4488 /* Rewrite the increment so that it uses var_before directly. */
4489 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4491 return;
4494 gimple_add_tmp_var (cand->var_before);
4495 add_referenced_tmp_var (cand->var_before);
4497 base = unshare_expr (cand->iv->base);
4499 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4500 &incr_pos, after, &cand->var_before, &cand->var_after);
4503 /* Creates new induction variables described in SET. */
4505 static void
4506 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4508 unsigned i;
4509 struct iv_cand *cand;
4510 bitmap_iterator bi;
4512 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4514 cand = iv_cand (data, i);
4515 create_new_iv (data, cand);
4519 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4520 is true, remove also the ssa name defined by the statement. */
4522 static void
4523 remove_statement (tree stmt, bool including_defined_name)
4525 if (TREE_CODE (stmt) == PHI_NODE)
4527 if (!including_defined_name)
4529 /* Prevent the ssa name defined by the statement from being removed. */
4530 SET_PHI_RESULT (stmt, NULL);
4532 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4534 else
4536 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4538 bsi_remove (&bsi);
4542 /* Rewrites USE (definition of iv used in a nonlinear expression)
4543 using candidate CAND. */
4545 static void
4546 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4547 struct iv_use *use, struct iv_cand *cand)
4549 tree comp = unshare_expr (get_computation (data->current_loop,
4550 use, cand));
4551 tree op, stmts, tgt, ass;
4552 block_stmt_iterator bsi, pbsi;
4554 switch (TREE_CODE (use->stmt))
4556 case PHI_NODE:
4557 tgt = PHI_RESULT (use->stmt);
4559 /* If we should keep the biv, do not replace it. */
4560 if (name_info (data, tgt)->preserve_biv)
4561 return;
4563 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4564 while (!bsi_end_p (pbsi)
4565 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4567 bsi = pbsi;
4568 bsi_next (&pbsi);
4570 break;
4572 case MODIFY_EXPR:
4573 tgt = TREE_OPERAND (use->stmt, 0);
4574 bsi = bsi_for_stmt (use->stmt);
4575 break;
4577 default:
4578 gcc_unreachable ();
4581 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4583 if (TREE_CODE (use->stmt) == PHI_NODE)
4585 if (stmts)
4586 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4587 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4588 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4589 remove_statement (use->stmt, false);
4590 SSA_NAME_DEF_STMT (tgt) = ass;
4592 else
4594 if (stmts)
4595 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4596 TREE_OPERAND (use->stmt, 1) = op;
4600 /* Replaces ssa name in index IDX by its basic variable. Callback for
4601 for_each_index. */
4603 static bool
4604 idx_remove_ssa_names (tree base, tree *idx,
4605 void *data ATTRIBUTE_UNUSED)
4607 tree *op;
4609 if (TREE_CODE (*idx) == SSA_NAME)
4610 *idx = SSA_NAME_VAR (*idx);
4612 if (TREE_CODE (base) == ARRAY_REF)
4614 op = &TREE_OPERAND (base, 2);
4615 if (*op
4616 && TREE_CODE (*op) == SSA_NAME)
4617 *op = SSA_NAME_VAR (*op);
4618 op = &TREE_OPERAND (base, 3);
4619 if (*op
4620 && TREE_CODE (*op) == SSA_NAME)
4621 *op = SSA_NAME_VAR (*op);
4624 return true;
4627 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4629 static tree
4630 unshare_and_remove_ssa_names (tree ref)
4632 ref = unshare_expr (ref);
4633 for_each_index (&ref, idx_remove_ssa_names, NULL);
4635 return ref;
4638 /* Rewrites base of memory access OP with expression WITH in statement
4639 pointed to by BSI. */
4641 static void
4642 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4644 tree bvar, var, new_var, new_name, copy, name;
4645 tree orig;
4647 var = bvar = get_base_address (*op);
4649 if (!var || TREE_CODE (with) != SSA_NAME)
4650 goto do_rewrite;
4652 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4653 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4654 if (TREE_CODE (var) == INDIRECT_REF)
4655 var = TREE_OPERAND (var, 0);
4656 if (TREE_CODE (var) == SSA_NAME)
4658 name = var;
4659 var = SSA_NAME_VAR (var);
4661 else if (DECL_P (var))
4662 name = NULL_TREE;
4663 else
4664 goto do_rewrite;
4666 if (var_ann (var)->type_mem_tag)
4667 var = var_ann (var)->type_mem_tag;
4669 /* We need to add a memory tag for the variable. But we do not want
4670 to add it to the temporary used for the computations, since this leads
4671 to problems in redundancy elimination when there are common parts
4672 in two computations referring to the different arrays. So we copy
4673 the variable to a new temporary. */
4674 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4675 if (name)
4676 new_name = duplicate_ssa_name (name, copy);
4677 else
4679 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4680 add_referenced_tmp_var (new_var);
4681 var_ann (new_var)->type_mem_tag = var;
4682 new_name = make_ssa_name (new_var, copy);
4684 TREE_OPERAND (copy, 0) = new_name;
4685 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4686 with = new_name;
4688 do_rewrite:
4690 orig = NULL_TREE;
4691 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4692 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4694 if (TREE_CODE (*op) == INDIRECT_REF)
4695 orig = REF_ORIGINAL (*op);
4696 if (!orig)
4697 orig = unshare_and_remove_ssa_names (*op);
4699 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4701 /* Record the original reference, for purposes of alias analysis. */
4702 REF_ORIGINAL (*op) = orig;
4705 /* Rewrites USE (address that is an iv) using candidate CAND. */
4707 static void
4708 rewrite_use_address (struct ivopts_data *data,
4709 struct iv_use *use, struct iv_cand *cand)
4711 tree comp = unshare_expr (get_computation (data->current_loop,
4712 use, cand));
4713 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4714 tree stmts;
4715 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4717 if (stmts)
4718 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4720 rewrite_address_base (&bsi, use->op_p, op);
4723 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4724 candidate CAND. */
4726 static void
4727 rewrite_use_compare (struct ivopts_data *data,
4728 struct iv_use *use, struct iv_cand *cand)
4730 tree comp;
4731 tree *op_p, cond, op, stmts, bound;
4732 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4733 enum tree_code compare;
4735 if (may_eliminate_iv (data->current_loop,
4736 use, cand, &compare, &bound))
4738 op = force_gimple_operand (unshare_expr (bound), &stmts,
4739 true, NULL_TREE);
4741 if (stmts)
4742 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4744 *use->op_p = build2 (compare, boolean_type_node,
4745 var_at_stmt (data->current_loop,
4746 cand, use->stmt), op);
4747 modify_stmt (use->stmt);
4748 return;
4751 /* The induction variable elimination failed; just express the original
4752 giv. */
4753 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4755 cond = *use->op_p;
4756 op_p = &TREE_OPERAND (cond, 0);
4757 if (TREE_CODE (*op_p) != SSA_NAME
4758 || zero_p (get_iv (data, *op_p)->step))
4759 op_p = &TREE_OPERAND (cond, 1);
4761 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4762 if (stmts)
4763 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4765 *op_p = op;
4768 /* Ensure that operand *OP_P may be used at the end of EXIT without
4769 violating loop closed ssa form. */
4771 static void
4772 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4774 basic_block def_bb;
4775 struct loop *def_loop;
4776 tree phi, use;
4778 use = USE_FROM_PTR (op_p);
4779 if (TREE_CODE (use) != SSA_NAME)
4780 return;
4782 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4783 if (!def_bb)
4784 return;
4786 def_loop = def_bb->loop_father;
4787 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4788 return;
4790 /* Try finding a phi node that copies the value out of the loop. */
4791 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4792 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4793 break;
4795 if (!phi)
4797 /* Create such a phi node. */
4798 tree new_name = duplicate_ssa_name (use, NULL);
4800 phi = create_phi_node (new_name, exit->dest);
4801 SSA_NAME_DEF_STMT (new_name) = phi;
4802 add_phi_arg (phi, use, exit);
4805 SET_USE (op_p, PHI_RESULT (phi));
4808 /* Ensure that operands of STMT may be used at the end of EXIT without
4809 violating loop closed ssa form. */
4811 static void
4812 protect_loop_closed_ssa_form (edge exit, tree stmt)
4814 use_optype uses;
4815 vuse_optype vuses;
4816 v_may_def_optype v_may_defs;
4817 unsigned i;
4819 get_stmt_operands (stmt);
4821 uses = STMT_USE_OPS (stmt);
4822 for (i = 0; i < NUM_USES (uses); i++)
4823 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4825 vuses = STMT_VUSE_OPS (stmt);
4826 for (i = 0; i < NUM_VUSES (vuses); i++)
4827 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4829 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4830 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4831 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4834 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4835 so that they are emitted on the correct place, and so that the loop closed
4836 ssa form is preserved. */
4838 static void
4839 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4841 tree_stmt_iterator tsi;
4842 block_stmt_iterator bsi;
4843 tree phi, stmt, def, next;
4845 if (EDGE_COUNT (exit->dest->preds) > 1)
4846 split_loop_exit_edge (exit);
4848 if (TREE_CODE (stmts) == STATEMENT_LIST)
4850 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4851 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4853 else
4854 protect_loop_closed_ssa_form (exit, stmts);
4856 /* Ensure there is label in exit->dest, so that we can
4857 insert after it. */
4858 tree_block_label (exit->dest);
4859 bsi = bsi_after_labels (exit->dest);
4860 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4862 if (!op)
4863 return;
4865 for (phi = phi_nodes (exit->dest); phi; phi = next)
4867 next = PHI_CHAIN (phi);
4869 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4871 def = PHI_RESULT (phi);
4872 remove_statement (phi, false);
4873 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4874 def, op);
4875 SSA_NAME_DEF_STMT (def) = stmt;
4876 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4881 /* Rewrites the final value of USE (that is only needed outside of the loop)
4882 using candidate CAND. */
4884 static void
4885 rewrite_use_outer (struct ivopts_data *data,
4886 struct iv_use *use, struct iv_cand *cand)
4888 edge exit;
4889 tree value, op, stmts, tgt;
4890 tree phi;
4892 switch (TREE_CODE (use->stmt))
4894 case PHI_NODE:
4895 tgt = PHI_RESULT (use->stmt);
4896 break;
4897 case MODIFY_EXPR:
4898 tgt = TREE_OPERAND (use->stmt, 0);
4899 break;
4900 default:
4901 gcc_unreachable ();
4904 exit = single_dom_exit (data->current_loop);
4906 if (exit)
4908 if (!cand->iv)
4910 bool ok = may_replace_final_value (data->current_loop, use, &value);
4911 gcc_assert (ok);
4913 else
4914 value = get_computation_at (data->current_loop,
4915 use, cand, last_stmt (exit->src));
4917 value = unshare_expr (value);
4918 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4920 /* If we will preserve the iv anyway and we would need to perform
4921 some computation to replace the final value, do nothing. */
4922 if (stmts && name_info (data, tgt)->preserve_biv)
4923 return;
4925 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4927 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4929 if (USE_FROM_PTR (use_p) == tgt)
4930 SET_USE (use_p, op);
4933 if (stmts)
4934 compute_phi_arg_on_exit (exit, stmts, op);
4936 /* Enable removal of the statement. We cannot remove it directly,
4937 since we may still need the aliasing information attached to the
4938 ssa name defined by it. */
4939 name_info (data, tgt)->iv->have_use_for = false;
4940 return;
4943 /* If the variable is going to be preserved anyway, there is nothing to
4944 do. */
4945 if (name_info (data, tgt)->preserve_biv)
4946 return;
4948 /* Otherwise we just need to compute the iv. */
4949 rewrite_use_nonlinear_expr (data, use, cand);
4952 /* Rewrites USE using candidate CAND. */
4954 static void
4955 rewrite_use (struct ivopts_data *data,
4956 struct iv_use *use, struct iv_cand *cand)
4958 switch (use->type)
4960 case USE_NONLINEAR_EXPR:
4961 rewrite_use_nonlinear_expr (data, use, cand);
4962 break;
4964 case USE_OUTER:
4965 rewrite_use_outer (data, use, cand);
4966 break;
4968 case USE_ADDRESS:
4969 rewrite_use_address (data, use, cand);
4970 break;
4972 case USE_COMPARE:
4973 rewrite_use_compare (data, use, cand);
4974 break;
4976 default:
4977 gcc_unreachable ();
4979 modify_stmt (use->stmt);
4982 /* Rewrite the uses using the selected induction variables. */
4984 static void
4985 rewrite_uses (struct ivopts_data *data)
4987 unsigned i;
4988 struct iv_cand *cand;
4989 struct iv_use *use;
4991 for (i = 0; i < n_iv_uses (data); i++)
4993 use = iv_use (data, i);
4994 cand = use->selected;
4995 gcc_assert (cand);
4997 rewrite_use (data, use, cand);
5001 /* Removes the ivs that are not used after rewriting. */
5003 static void
5004 remove_unused_ivs (struct ivopts_data *data)
5006 unsigned j;
5007 bitmap_iterator bi;
5009 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5011 struct version_info *info;
5013 info = ver_info (data, j);
5014 if (info->iv
5015 && !zero_p (info->iv->step)
5016 && !info->inv_id
5017 && !info->iv->have_use_for
5018 && !info->preserve_biv)
5019 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5023 /* Frees data allocated by the optimization of a single loop. */
5025 static void
5026 free_loop_data (struct ivopts_data *data)
5028 unsigned i, j;
5029 bitmap_iterator bi;
5031 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5033 struct version_info *info;
5035 info = ver_info (data, i);
5036 if (info->iv)
5037 free (info->iv);
5038 info->iv = NULL;
5039 info->has_nonlin_use = false;
5040 info->preserve_biv = false;
5041 info->inv_id = 0;
5043 bitmap_clear (data->relevant);
5044 bitmap_clear (data->important_candidates);
5046 for (i = 0; i < n_iv_uses (data); i++)
5048 struct iv_use *use = iv_use (data, i);
5050 free (use->iv);
5051 BITMAP_XFREE (use->related_cands);
5052 for (j = 0; j < use->n_map_members; j++)
5053 if (use->cost_map[j].depends_on)
5054 BITMAP_XFREE (use->cost_map[j].depends_on);
5055 free (use->cost_map);
5056 free (use);
5058 VARRAY_POP_ALL (data->iv_uses);
5060 for (i = 0; i < n_iv_cands (data); i++)
5062 struct iv_cand *cand = iv_cand (data, i);
5064 if (cand->iv)
5065 free (cand->iv);
5066 free (cand);
5068 VARRAY_POP_ALL (data->iv_candidates);
5070 if (data->version_info_size < num_ssa_names)
5072 data->version_info_size = 2 * num_ssa_names;
5073 free (data->version_info);
5074 data->version_info = xcalloc (data->version_info_size,
5075 sizeof (struct version_info));
5078 data->max_inv_id = 0;
5080 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5082 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5084 SET_DECL_RTL (obj, NULL_RTX);
5086 VARRAY_POP_ALL (decl_rtl_to_reset);
5089 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5090 loop tree. */
5092 static void
5093 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5095 unsigned i;
5097 for (i = 1; i < loops->num; i++)
5098 if (loops->parray[i])
5100 free (loops->parray[i]->aux);
5101 loops->parray[i]->aux = NULL;
5104 free_loop_data (data);
5105 free (data->version_info);
5106 BITMAP_XFREE (data->relevant);
5107 BITMAP_XFREE (data->important_candidates);
5109 VARRAY_FREE (decl_rtl_to_reset);
5110 VARRAY_FREE (data->iv_uses);
5111 VARRAY_FREE (data->iv_candidates);
5114 /* Optimizes the LOOP. Returns true if anything changed. */
5116 static bool
5117 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5119 bool changed = false;
5120 struct iv_ca *iv_ca;
5121 edge exit;
5123 data->current_loop = loop;
5125 if (dump_file && (dump_flags & TDF_DETAILS))
5127 fprintf (dump_file, "Processing loop %d\n", loop->num);
5129 exit = single_dom_exit (loop);
5130 if (exit)
5132 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5133 exit->src->index, exit->dest->index);
5134 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5135 fprintf (dump_file, "\n");
5138 fprintf (dump_file, "\n");
5141 /* For each ssa name determines whether it behaves as an induction variable
5142 in some loop. */
5143 if (!find_induction_variables (data))
5144 goto finish;
5146 /* Finds interesting uses (item 1). */
5147 find_interesting_uses (data);
5148 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5149 goto finish;
5151 /* Finds candidates for the induction variables (item 2). */
5152 find_iv_candidates (data);
5154 /* Calculates the costs (item 3, part 1). */
5155 determine_use_iv_costs (data);
5156 determine_iv_costs (data);
5157 determine_set_costs (data);
5159 /* Find the optimal set of induction variables (item 3, part 2). */
5160 iv_ca = find_optimal_iv_set (data);
5161 if (!iv_ca)
5162 goto finish;
5163 changed = true;
5165 /* Create the new induction variables (item 4, part 1). */
5166 create_new_ivs (data, iv_ca);
5167 iv_ca_free (&iv_ca);
5169 /* Rewrite the uses (item 4, part 2). */
5170 rewrite_uses (data);
5172 /* Remove the ivs that are unused after rewriting. */
5173 remove_unused_ivs (data);
5175 loop_commit_inserts ();
5177 /* We have changed the structure of induction variables; it might happen
5178 that definitions in the scev database refer to some of them that were
5179 eliminated. */
5180 scev_reset ();
5182 finish:
5183 free_loop_data (data);
5185 return changed;
5188 /* Main entry point. Optimizes induction variables in LOOPS. */
5190 void
5191 tree_ssa_iv_optimize (struct loops *loops)
5193 struct loop *loop;
5194 struct ivopts_data data;
5196 tree_ssa_iv_optimize_init (loops, &data);
5198 /* Optimize the loops starting with the innermost ones. */
5199 loop = loops->tree_root;
5200 while (loop->inner)
5201 loop = loop->inner;
5203 #ifdef ENABLE_CHECKING
5204 verify_loop_closed_ssa ();
5205 verify_stmts ();
5206 #endif
5208 /* Scan the loops, inner ones first. */
5209 while (loop != loops->tree_root)
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5212 flow_loop_dump (loop, dump_file, NULL, 1);
5214 tree_ssa_iv_optimize_loop (&data, loop);
5216 if (loop->next)
5218 loop = loop->next;
5219 while (loop->inner)
5220 loop = loop->inner;
5222 else
5223 loop = loop->outer;
5226 #ifdef ENABLE_CHECKING
5227 verify_loop_closed_ssa ();
5228 verify_stmts ();
5229 #endif
5231 tree_ssa_iv_optimize_finalize (loops, &data);