PR target/19236
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob73034127a7c9d60d86c54418f43c987ccbf0b3d7
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
129 unsigned regs_used; /* Number of registers used. */
132 /* Types of uses. */
133 enum use_type
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
141 /* The candidate - cost pair. */
142 struct cost_pair
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
197 struct ivopts_data
199 /* The currently optimized loop. */
200 struct loop *current_loop;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* The number of candidates in the set. */
248 unsigned n_cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
303 static varray_type decl_rtl_to_reset;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
337 /* The data for LOOP. */
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
342 return loop->aux;
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
347 static edge
348 single_dom_exit (struct loop *loop)
350 edge exit = loop->single_exit;
352 if (!exit)
353 return NULL;
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
358 return exit;
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
367 if (iv->ssa_name)
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 if (iv->base_object)
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
412 fprintf (file, "use %d\n", use->id);
414 switch (use->type)
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 if (use->related_cands)
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
460 unsigned i;
461 struct iv_use *use;
463 for (i = 0; i < n_iv_uses (data); i++)
465 use = iv_use (data, i);
467 dump_use (file, use);
468 fprintf (file, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
478 struct iv *iv = cand->iv;
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
483 if (!iv)
485 fprintf (file, " final value replacement\n");
486 return;
489 switch (cand->pos)
491 case IP_NORMAL:
492 fprintf (file, " incremented before exit test\n");
493 break;
495 case IP_END:
496 fprintf (file, " incremented at end\n");
497 break;
499 case IP_ORIGINAL:
500 fprintf (file, " original biv\n");
501 break;
504 dump_iv (file, iv);
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info *
510 ver_info (struct ivopts_data *data, unsigned ver)
512 return data->version_info + ver;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info *
518 name_info (struct ivopts_data *data, tree name)
520 return ver_info (data, SSA_NAME_VERSION (name));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
524 2^BITS. */
526 static bool
527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
528 HOST_WIDE_INT *x)
530 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
531 unsigned HOST_WIDE_INT inv, ex, val;
532 unsigned i;
534 a &= mask;
535 b &= mask;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a & 1) && !(b & 1))
540 a >>= 1;
541 b >>= 1;
542 bits--;
543 mask >>= 1;
546 if (!(b & 1))
548 /* If b is still even, a is odd and there is no such x. */
549 return false;
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
554 inv = 1;
555 ex = b;
556 for (i = 0; i < bits - 1; i++)
558 inv = (inv * ex) & mask;
559 ex = (ex * ex) & mask;
562 val = (a * inv) & mask;
564 gcc_assert (((val * b) & mask) == a);
566 if ((val >> (bits - 1)) & 1)
567 val |= ~mask;
569 *x = val;
571 return true;
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
575 emitted in LOOP. */
577 static bool
578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582 gcc_assert (bb);
584 if (sbb == loop->latch)
585 return true;
587 if (sbb != bb)
588 return false;
590 return stmt == last_stmt (bb);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
596 static bool
597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
600 basic_block stmt_bb = bb_for_stmt (stmt);
601 block_stmt_iterator bsi;
603 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
604 return false;
606 if (stmt_bb != cand_bb)
607 return true;
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 if (bsi_stmt (bsi) == cand->incremented_at)
614 return false;
615 if (bsi_stmt (bsi) == stmt)
616 return true;
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
623 static bool
624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626 switch (cand->pos)
628 case IP_END:
629 return false;
631 case IP_NORMAL:
632 return stmt_after_ip_normal_pos (loop, stmt);
634 case IP_ORIGINAL:
635 return stmt_after_ip_original_pos (cand, stmt);
637 default:
638 gcc_unreachable ();
642 /* Initializes data structures used by the iv optimization pass, stored
643 in DATA. LOOPS is the loop tree. */
645 static void
646 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
648 unsigned i;
650 data->version_info_size = 2 * num_ssa_names;
651 data->version_info = xcalloc (data->version_info_size,
652 sizeof (struct version_info));
653 data->relevant = BITMAP_XMALLOC ();
654 data->important_candidates = BITMAP_XMALLOC ();
655 data->max_inv_id = 0;
657 for (i = 1; i < loops->num; i++)
658 if (loops->parray[i])
659 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
661 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
662 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
663 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
666 /* Returns a memory object to that EXPR points. In case we are able to
667 determine that it does not point to any such object, NULL is returned. */
669 static tree
670 determine_base_object (tree expr)
672 enum tree_code code = TREE_CODE (expr);
673 tree base, obj, op0, op1;
675 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
676 return NULL_TREE;
678 switch (code)
680 case INTEGER_CST:
681 return NULL_TREE;
683 case ADDR_EXPR:
684 obj = TREE_OPERAND (expr, 0);
685 base = get_base_address (obj);
687 if (!base)
688 return fold_convert (ptr_type_node, expr);
690 if (TREE_CODE (base) == INDIRECT_REF)
691 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
693 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
695 case PLUS_EXPR:
696 case MINUS_EXPR:
697 op0 = determine_base_object (TREE_OPERAND (expr, 0));
698 op1 = determine_base_object (TREE_OPERAND (expr, 1));
700 if (!op1)
701 return op0;
703 if (!op0)
704 return (code == PLUS_EXPR
705 ? op1
706 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
708 return fold (build (code, ptr_type_node, op0, op1));
710 default:
711 return fold_convert (ptr_type_node, expr);
715 /* Allocates an induction variable with given initial value BASE and step STEP
716 for loop LOOP. */
718 static struct iv *
719 alloc_iv (tree base, tree step)
721 struct iv *iv = xcalloc (1, sizeof (struct iv));
723 if (step && integer_zerop (step))
724 step = NULL_TREE;
726 iv->base = base;
727 iv->base_object = determine_base_object (base);
728 iv->step = step;
729 iv->biv_p = false;
730 iv->have_use_for = false;
731 iv->use_id = 0;
732 iv->ssa_name = NULL_TREE;
734 return iv;
737 /* Sets STEP and BASE for induction variable IV. */
739 static void
740 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
742 struct version_info *info = name_info (data, iv);
744 gcc_assert (!info->iv);
746 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
747 info->iv = alloc_iv (base, step);
748 info->iv->ssa_name = iv;
751 /* Finds induction variable declaration for VAR. */
753 static struct iv *
754 get_iv (struct ivopts_data *data, tree var)
756 basic_block bb;
758 if (!name_info (data, var)->iv)
760 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
762 if (!bb
763 || !flow_bb_inside_loop_p (data->current_loop, bb))
764 set_iv (data, var, var, NULL_TREE);
767 return name_info (data, var)->iv;
770 /* Determines the step of a biv defined in PHI. */
772 static tree
773 determine_biv_step (tree phi)
775 struct loop *loop = bb_for_stmt (phi)->loop_father;
776 tree name = PHI_RESULT (phi), base, step;
777 tree type = TREE_TYPE (name);
779 if (!is_gimple_reg (name))
780 return NULL_TREE;
782 if (!simple_iv (loop, phi, name, &base, &step))
783 return NULL_TREE;
785 if (!step)
786 return build_int_cst (type, 0);
788 return step;
791 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
793 static bool
794 abnormal_ssa_name_p (tree exp)
796 if (!exp)
797 return false;
799 if (TREE_CODE (exp) != SSA_NAME)
800 return false;
802 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
805 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
806 abnormal phi node. Callback for for_each_index. */
808 static bool
809 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
810 void *data ATTRIBUTE_UNUSED)
812 if (TREE_CODE (base) == ARRAY_REF)
814 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
815 return false;
816 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
817 return false;
820 return !abnormal_ssa_name_p (*index);
823 /* Returns true if EXPR contains a ssa name that occurs in an
824 abnormal phi node. */
826 static bool
827 contains_abnormal_ssa_name_p (tree expr)
829 enum tree_code code = TREE_CODE (expr);
830 enum tree_code_class class = TREE_CODE_CLASS (code);
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
844 switch (class)
846 case tcc_binary:
847 case tcc_comparison:
848 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
849 return true;
851 /* Fallthru. */
852 case tcc_unary:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
854 return true;
856 break;
858 default:
859 gcc_unreachable ();
862 return false;
865 /* Finds basic ivs. */
867 static bool
868 find_bivs (struct ivopts_data *data)
870 tree phi, step, type, base;
871 bool found = false;
872 struct loop *loop = data->current_loop;
874 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
876 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
877 continue;
879 step = determine_biv_step (phi);
881 if (!step)
882 continue;
883 if (cst_and_fits_in_hwi (step)
884 && int_cst_value (step) == 0)
885 continue;
887 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
888 if (contains_abnormal_ssa_name_p (base))
889 continue;
891 type = TREE_TYPE (PHI_RESULT (phi));
892 base = fold_convert (type, base);
893 step = fold_convert (type, step);
895 /* FIXME: We do not handle induction variables whose step does
896 not satisfy cst_and_fits_in_hwi. */
897 if (!cst_and_fits_in_hwi (step))
898 continue;
900 set_iv (data, PHI_RESULT (phi), base, step);
901 found = true;
904 return found;
907 /* Marks basic ivs. */
909 static void
910 mark_bivs (struct ivopts_data *data)
912 tree phi, var;
913 struct iv *iv, *incr_iv;
914 struct loop *loop = data->current_loop;
915 basic_block incr_bb;
917 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
919 iv = get_iv (data, PHI_RESULT (phi));
920 if (!iv)
921 continue;
923 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
924 incr_iv = get_iv (data, var);
925 if (!incr_iv)
926 continue;
928 /* If the increment is in the subloop, ignore it. */
929 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
930 if (incr_bb->loop_father != data->current_loop
931 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
932 continue;
934 iv->biv_p = true;
935 incr_iv->biv_p = true;
939 /* Checks whether STMT defines a linear induction variable and stores its
940 parameters to BASE and STEP. */
942 static bool
943 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
944 tree *base, tree *step)
946 tree lhs;
947 struct loop *loop = data->current_loop;
949 *base = NULL_TREE;
950 *step = NULL_TREE;
952 if (TREE_CODE (stmt) != MODIFY_EXPR)
953 return false;
955 lhs = TREE_OPERAND (stmt, 0);
956 if (TREE_CODE (lhs) != SSA_NAME)
957 return false;
959 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
960 return false;
962 /* FIXME: We do not handle induction variables whose step does
963 not satisfy cst_and_fits_in_hwi. */
964 if (!zero_p (*step)
965 && !cst_and_fits_in_hwi (*step))
966 return false;
968 if (contains_abnormal_ssa_name_p (*base))
969 return false;
971 return true;
974 /* Finds general ivs in statement STMT. */
976 static void
977 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
979 tree base, step;
981 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
982 return;
984 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
987 /* Finds general ivs in basic block BB. */
989 static void
990 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
992 block_stmt_iterator bsi;
994 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
995 find_givs_in_stmt (data, bsi_stmt (bsi));
998 /* Finds general ivs. */
1000 static void
1001 find_givs (struct ivopts_data *data)
1003 struct loop *loop = data->current_loop;
1004 basic_block *body = get_loop_body_in_dom_order (loop);
1005 unsigned i;
1007 for (i = 0; i < loop->num_nodes; i++)
1008 find_givs_in_bb (data, body[i]);
1009 free (body);
1012 /* Determine the number of iterations of the current loop. */
1014 static void
1015 determine_number_of_iterations (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 edge exit = single_dom_exit (loop);
1020 if (!exit)
1021 return;
1023 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1032 unsigned i;
1033 struct loop *loop = data->current_loop;
1034 bitmap_iterator bi;
1036 if (!find_bivs (data))
1037 return false;
1039 find_givs (data);
1040 mark_bivs (data);
1041 determine_number_of_iterations (data);
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1045 if (loop_data (loop)->niter.niter)
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1049 TDF_SLIM);
1050 fprintf (dump_file, "\n");
1052 fprintf (dump_file, " may be zero if ");
1053 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1054 TDF_SLIM);
1055 fprintf (dump_file, "\n");
1057 fprintf (dump_file, " bogus unless ");
1058 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1059 TDF_SLIM);
1060 fprintf (dump_file, "\n");
1061 fprintf (dump_file, "\n");
1064 fprintf (dump_file, "Induction variables:\n\n");
1066 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1068 if (ver_info (data, i)->iv)
1069 dump_iv (dump_file, ver_info (data, i)->iv);
1073 return true;
1076 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1078 static struct iv_use *
1079 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1080 tree stmt, enum use_type use_type)
1082 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1084 use->id = n_iv_uses (data);
1085 use->type = use_type;
1086 use->iv = iv;
1087 use->stmt = stmt;
1088 use->op_p = use_p;
1089 use->related_cands = BITMAP_XMALLOC ();
1091 /* To avoid showing ssa name in the dumps, if it was not reset by the
1092 caller. */
1093 iv->ssa_name = NULL_TREE;
1095 if (dump_file && (dump_flags & TDF_DETAILS))
1096 dump_use (dump_file, use);
1098 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1100 return use;
1103 /* Checks whether OP is a loop-level invariant and if so, records it.
1104 NONLINEAR_USE is true if the invariant is used in a way we do not
1105 handle specially. */
1107 static void
1108 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1110 basic_block bb;
1111 struct version_info *info;
1113 if (TREE_CODE (op) != SSA_NAME
1114 || !is_gimple_reg (op))
1115 return;
1117 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1118 if (bb
1119 && flow_bb_inside_loop_p (data->current_loop, bb))
1120 return;
1122 info = name_info (data, op);
1123 info->name = op;
1124 info->has_nonlin_use |= nonlinear_use;
1125 if (!info->inv_id)
1126 info->inv_id = ++data->max_inv_id;
1127 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1130 /* Checks whether the use OP is interesting and if so, records it
1131 as TYPE. */
1133 static struct iv_use *
1134 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1135 enum use_type type)
1137 struct iv *iv;
1138 struct iv *civ;
1139 tree stmt;
1140 struct iv_use *use;
1142 if (TREE_CODE (op) != SSA_NAME)
1143 return NULL;
1145 iv = get_iv (data, op);
1146 if (!iv)
1147 return NULL;
1149 if (iv->have_use_for)
1151 use = iv_use (data, iv->use_id);
1153 gcc_assert (use->type == USE_NONLINEAR_EXPR
1154 || use->type == USE_OUTER);
1156 if (type == USE_NONLINEAR_EXPR)
1157 use->type = USE_NONLINEAR_EXPR;
1158 return use;
1161 if (zero_p (iv->step))
1163 record_invariant (data, op, true);
1164 return NULL;
1166 iv->have_use_for = true;
1168 civ = xmalloc (sizeof (struct iv));
1169 *civ = *iv;
1171 stmt = SSA_NAME_DEF_STMT (op);
1172 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1173 || TREE_CODE (stmt) == MODIFY_EXPR);
1175 use = record_use (data, NULL, civ, stmt, type);
1176 iv->use_id = use->id;
1178 return use;
1181 /* Checks whether the use OP is interesting and if so, records it. */
1183 static struct iv_use *
1184 find_interesting_uses_op (struct ivopts_data *data, tree op)
1186 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1189 /* Records a definition of induction variable OP that is used outside of the
1190 loop. */
1192 static struct iv_use *
1193 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1195 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1198 /* Checks whether the condition *COND_P in STMT is interesting
1199 and if so, records it. */
1201 static void
1202 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1204 tree *op0_p;
1205 tree *op1_p;
1206 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1207 struct iv const_iv;
1208 tree zero = integer_zero_node;
1210 const_iv.step = NULL_TREE;
1212 if (integer_zerop (*cond_p)
1213 || integer_nonzerop (*cond_p))
1214 return;
1216 if (TREE_CODE (*cond_p) == SSA_NAME)
1218 op0_p = cond_p;
1219 op1_p = &zero;
1221 else
1223 op0_p = &TREE_OPERAND (*cond_p, 0);
1224 op1_p = &TREE_OPERAND (*cond_p, 1);
1227 if (TREE_CODE (*op0_p) == SSA_NAME)
1228 iv0 = get_iv (data, *op0_p);
1229 else
1230 iv0 = &const_iv;
1232 if (TREE_CODE (*op1_p) == SSA_NAME)
1233 iv1 = get_iv (data, *op1_p);
1234 else
1235 iv1 = &const_iv;
1237 if (/* When comparing with non-invariant value, we may not do any senseful
1238 induction variable elimination. */
1239 (!iv0 || !iv1)
1240 /* Eliminating condition based on two ivs would be nontrivial.
1241 ??? TODO -- it is not really important to handle this case. */
1242 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1244 find_interesting_uses_op (data, *op0_p);
1245 find_interesting_uses_op (data, *op1_p);
1246 return;
1249 if (zero_p (iv0->step) && zero_p (iv1->step))
1251 /* If both are invariants, this is a work for unswitching. */
1252 return;
1255 civ = xmalloc (sizeof (struct iv));
1256 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1257 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1260 /* Returns true if expression EXPR is obviously invariant in LOOP,
1261 i.e. if all its operands are defined outside of the LOOP. */
1263 bool
1264 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1266 basic_block def_bb;
1267 unsigned i, len;
1269 if (is_gimple_min_invariant (expr))
1270 return true;
1272 if (TREE_CODE (expr) == SSA_NAME)
1274 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1275 if (def_bb
1276 && flow_bb_inside_loop_p (loop, def_bb))
1277 return false;
1279 return true;
1282 if (!EXPR_P (expr))
1283 return false;
1285 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1286 for (i = 0; i < len; i++)
1287 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1288 return false;
1290 return true;
1293 /* Cumulates the steps of indices into DATA and replaces their values with the
1294 initial ones. Returns false when the value of the index cannot be determined.
1295 Callback for for_each_index. */
1297 struct ifs_ivopts_data
1299 struct ivopts_data *ivopts_data;
1300 tree stmt;
1301 tree *step_p;
1304 static bool
1305 idx_find_step (tree base, tree *idx, void *data)
1307 struct ifs_ivopts_data *dta = data;
1308 struct iv *iv;
1309 tree step, type, iv_type, iv_step, lbound, off;
1310 struct loop *loop = dta->ivopts_data->current_loop;
1312 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1313 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1314 return false;
1316 /* If base is a component ref, require that the offset of the reference
1317 be invariant. */
1318 if (TREE_CODE (base) == COMPONENT_REF)
1320 off = component_ref_field_offset (base);
1321 return expr_invariant_in_loop_p (loop, off);
1324 /* If base is array, first check whether we will be able to move the
1325 reference out of the loop (in order to take its address in strength
1326 reduction). In order for this to work we need both lower bound
1327 and step to be loop invariants. */
1328 if (TREE_CODE (base) == ARRAY_REF)
1330 step = array_ref_element_size (base);
1331 lbound = array_ref_low_bound (base);
1333 if (!expr_invariant_in_loop_p (loop, step)
1334 || !expr_invariant_in_loop_p (loop, lbound))
1335 return false;
1338 if (TREE_CODE (*idx) != SSA_NAME)
1339 return true;
1341 iv = get_iv (dta->ivopts_data, *idx);
1342 if (!iv)
1343 return false;
1345 *idx = iv->base;
1347 if (!iv->step)
1348 return true;
1350 iv_type = TREE_TYPE (iv->base);
1351 type = build_pointer_type (TREE_TYPE (base));
1352 if (TREE_CODE (base) == ARRAY_REF)
1354 step = array_ref_element_size (base);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step) != INTEGER_CST)
1358 return false;
1360 else
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step = build_int_cst (type, 1);
1364 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1365 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1366 type, iv->base, iv->step, dta->stmt);
1367 else
1368 iv_step = fold_convert (iv_type, iv->step);
1370 if (!iv_step)
1372 /* The index might wrap. */
1373 return false;
1376 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1378 if (!*dta->step_p)
1379 *dta->step_p = step;
1380 else
1381 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1382 *dta->step_p, step);
1384 return true;
1387 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1388 object is passed to it in DATA. */
1390 static bool
1391 idx_record_use (tree base, tree *idx,
1392 void *data)
1394 find_interesting_uses_op (data, *idx);
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 find_interesting_uses_op (data, array_ref_element_size (base));
1398 find_interesting_uses_op (data, array_ref_low_bound (base));
1400 return true;
1403 /* Finds addresses in *OP_P inside STMT. */
1405 static void
1406 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1408 tree base = unshare_expr (*op_p), step = NULL;
1409 struct iv *civ;
1410 struct ifs_ivopts_data ifs_ivopts_data;
1412 /* Ignore bitfields for now. Not really something terribly complicated
1413 to handle. TODO. */
1414 if (TREE_CODE (base) == COMPONENT_REF
1415 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1416 goto fail;
1418 ifs_ivopts_data.ivopts_data = data;
1419 ifs_ivopts_data.stmt = stmt;
1420 ifs_ivopts_data.step_p = &step;
1421 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1422 || zero_p (step))
1423 goto fail;
1425 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1426 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1428 if (TREE_CODE (base) == INDIRECT_REF)
1429 base = TREE_OPERAND (base, 0);
1430 else
1431 base = build_addr (base);
1433 civ = alloc_iv (base, step);
1434 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1435 return;
1437 fail:
1438 for_each_index (op_p, idx_record_use, data);
1441 /* Finds and records invariants used in STMT. */
1443 static void
1444 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1446 use_optype uses = NULL;
1447 unsigned i, n;
1448 tree op;
1450 if (TREE_CODE (stmt) == PHI_NODE)
1451 n = PHI_NUM_ARGS (stmt);
1452 else
1454 get_stmt_operands (stmt);
1455 uses = STMT_USE_OPS (stmt);
1456 n = NUM_USES (uses);
1459 for (i = 0; i < n; i++)
1461 if (TREE_CODE (stmt) == PHI_NODE)
1462 op = PHI_ARG_DEF (stmt, i);
1463 else
1464 op = USE_OP (uses, i);
1466 record_invariant (data, op, false);
1470 /* Finds interesting uses of induction variables in the statement STMT. */
1472 static void
1473 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1475 struct iv *iv;
1476 tree op, lhs, rhs;
1477 use_optype uses = NULL;
1478 unsigned i, n;
1480 find_invariants_stmt (data, stmt);
1482 if (TREE_CODE (stmt) == COND_EXPR)
1484 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1485 return;
1488 if (TREE_CODE (stmt) == MODIFY_EXPR)
1490 lhs = TREE_OPERAND (stmt, 0);
1491 rhs = TREE_OPERAND (stmt, 1);
1493 if (TREE_CODE (lhs) == SSA_NAME)
1495 /* If the statement defines an induction variable, the uses are not
1496 interesting by themselves. */
1498 iv = get_iv (data, lhs);
1500 if (iv && !zero_p (iv->step))
1501 return;
1504 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1506 case tcc_comparison:
1507 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1508 return;
1510 case tcc_reference:
1511 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1512 if (REFERENCE_CLASS_P (lhs))
1513 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1514 return;
1516 default: ;
1519 if (REFERENCE_CLASS_P (lhs)
1520 && is_gimple_val (rhs))
1522 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1523 find_interesting_uses_op (data, rhs);
1524 return;
1527 /* TODO -- we should also handle address uses of type
1529 memory = call (whatever);
1533 call (memory). */
1536 if (TREE_CODE (stmt) == PHI_NODE
1537 && bb_for_stmt (stmt) == data->current_loop->header)
1539 lhs = PHI_RESULT (stmt);
1540 iv = get_iv (data, lhs);
1542 if (iv && !zero_p (iv->step))
1543 return;
1546 if (TREE_CODE (stmt) == PHI_NODE)
1547 n = PHI_NUM_ARGS (stmt);
1548 else
1550 uses = STMT_USE_OPS (stmt);
1551 n = NUM_USES (uses);
1554 for (i = 0; i < n; i++)
1556 if (TREE_CODE (stmt) == PHI_NODE)
1557 op = PHI_ARG_DEF (stmt, i);
1558 else
1559 op = USE_OP (uses, i);
1561 if (TREE_CODE (op) != SSA_NAME)
1562 continue;
1564 iv = get_iv (data, op);
1565 if (!iv)
1566 continue;
1568 find_interesting_uses_op (data, op);
1572 /* Finds interesting uses of induction variables outside of loops
1573 on loop exit edge EXIT. */
1575 static void
1576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1578 tree phi, def;
1580 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1582 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1583 find_interesting_uses_outer (data, def);
1587 /* Finds uses of the induction variables that are interesting. */
1589 static void
1590 find_interesting_uses (struct ivopts_data *data)
1592 basic_block bb;
1593 block_stmt_iterator bsi;
1594 tree phi;
1595 basic_block *body = get_loop_body (data->current_loop);
1596 unsigned i;
1597 struct version_info *info;
1598 edge e;
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, "Uses:\n\n");
1603 for (i = 0; i < data->current_loop->num_nodes; i++)
1605 edge_iterator ei;
1606 bb = body[i];
1608 FOR_EACH_EDGE (e, ei, bb->succs)
1609 if (e->dest != EXIT_BLOCK_PTR
1610 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1611 find_interesting_uses_outside (data, e);
1613 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1614 find_interesting_uses_stmt (data, phi);
1615 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1616 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1619 if (dump_file && (dump_flags & TDF_DETAILS))
1621 bitmap_iterator bi;
1623 fprintf (dump_file, "\n");
1625 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1627 info = ver_info (data, i);
1628 if (info->inv_id)
1630 fprintf (dump_file, " ");
1631 print_generic_expr (dump_file, info->name, TDF_SLIM);
1632 fprintf (dump_file, " is invariant (%d)%s\n",
1633 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1637 fprintf (dump_file, "\n");
1640 free (body);
1643 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1644 position to POS. If USE is not NULL, the candidate is set as related to
1645 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1646 replacement of the final value of the iv by a direct computation. */
1648 static struct iv_cand *
1649 add_candidate_1 (struct ivopts_data *data,
1650 tree base, tree step, bool important, enum iv_position pos,
1651 struct iv_use *use, tree incremented_at)
1653 unsigned i;
1654 struct iv_cand *cand = NULL;
1655 tree type;
1657 if (base)
1659 type = TREE_TYPE (base);
1660 if (!TYPE_UNSIGNED (type))
1662 type = unsigned_type_for (type);
1663 base = fold_convert (type, base);
1664 if (step)
1665 step = fold_convert (type, step);
1669 for (i = 0; i < n_iv_cands (data); i++)
1671 cand = iv_cand (data, i);
1673 if (cand->pos != pos)
1674 continue;
1676 if (cand->incremented_at != incremented_at)
1677 continue;
1679 if (!cand->iv)
1681 if (!base && !step)
1682 break;
1684 continue;
1687 if (!base && !step)
1688 continue;
1690 if (!operand_equal_p (base, cand->iv->base, 0))
1691 continue;
1693 if (zero_p (cand->iv->step))
1695 if (zero_p (step))
1696 break;
1698 else
1700 if (step && operand_equal_p (step, cand->iv->step, 0))
1701 break;
1705 if (i == n_iv_cands (data))
1707 cand = xcalloc (1, sizeof (struct iv_cand));
1708 cand->id = i;
1710 if (!base && !step)
1711 cand->iv = NULL;
1712 else
1713 cand->iv = alloc_iv (base, step);
1715 cand->pos = pos;
1716 if (pos != IP_ORIGINAL && cand->iv)
1718 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1719 cand->var_after = cand->var_before;
1721 cand->important = important;
1722 cand->incremented_at = incremented_at;
1723 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1725 if (dump_file && (dump_flags & TDF_DETAILS))
1726 dump_cand (dump_file, cand);
1729 if (important && !cand->important)
1731 cand->important = true;
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1733 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1736 if (use)
1738 bitmap_set_bit (use->related_cands, i);
1739 if (dump_file && (dump_flags & TDF_DETAILS))
1740 fprintf (dump_file, "Candidate %d is related to use %d\n",
1741 cand->id, use->id);
1744 return cand;
1747 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1748 position to POS. If USE is not NULL, the candidate is set as related to
1749 it. The candidate computation is scheduled on all available positions. */
1751 static void
1752 add_candidate (struct ivopts_data *data,
1753 tree base, tree step, bool important, struct iv_use *use)
1755 if (ip_normal_pos (data->current_loop))
1756 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1757 if (ip_end_pos (data->current_loop))
1758 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1761 /* Adds standard iv candidates. */
1763 static void
1764 add_standard_iv_candidates (struct ivopts_data *data)
1766 /* Add 0 + 1 * iteration candidate. */
1767 add_candidate (data,
1768 build_int_cst (unsigned_intSI_type_node, 0),
1769 build_int_cst (unsigned_intSI_type_node, 1),
1770 true, NULL);
1772 /* The same for a long type if it is still fast enough. */
1773 if (BITS_PER_WORD > 32)
1774 add_candidate (data,
1775 build_int_cst (unsigned_intDI_type_node, 0),
1776 build_int_cst (unsigned_intDI_type_node, 1),
1777 true, NULL);
1781 /* Adds candidates bases on the old induction variable IV. */
1783 static void
1784 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1786 tree phi, def;
1787 struct iv_cand *cand;
1789 add_candidate (data, iv->base, iv->step, true, NULL);
1791 /* The same, but with initial value zero. */
1792 add_candidate (data,
1793 build_int_cst (TREE_TYPE (iv->base), 0),
1794 iv->step, true, NULL);
1796 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1797 if (TREE_CODE (phi) == PHI_NODE)
1799 /* Additionally record the possibility of leaving the original iv
1800 untouched. */
1801 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1802 cand = add_candidate_1 (data,
1803 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1804 SSA_NAME_DEF_STMT (def));
1805 cand->var_before = iv->ssa_name;
1806 cand->var_after = def;
1810 /* Adds candidates based on the old induction variables. */
1812 static void
1813 add_old_ivs_candidates (struct ivopts_data *data)
1815 unsigned i;
1816 struct iv *iv;
1817 bitmap_iterator bi;
1819 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1821 iv = ver_info (data, i)->iv;
1822 if (iv && iv->biv_p && !zero_p (iv->step))
1823 add_old_iv_candidates (data, iv);
1827 /* Adds candidates based on the value of the induction variable IV and USE. */
1829 static void
1830 add_iv_value_candidates (struct ivopts_data *data,
1831 struct iv *iv, struct iv_use *use)
1833 add_candidate (data, iv->base, iv->step, false, use);
1835 /* The same, but with initial value zero. */
1836 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1837 iv->step, false, use);
1840 /* Adds candidates based on the address IV and USE. */
1842 static void
1843 add_address_candidates (struct ivopts_data *data,
1844 struct iv *iv, struct iv_use *use)
1846 tree base, abase, tmp, *act;
1848 /* First, the trivial choices. */
1849 add_iv_value_candidates (data, iv, use);
1851 /* Second, try removing the COMPONENT_REFs. */
1852 if (TREE_CODE (iv->base) == ADDR_EXPR)
1854 base = TREE_OPERAND (iv->base, 0);
1855 while (TREE_CODE (base) == COMPONENT_REF
1856 || (TREE_CODE (base) == ARRAY_REF
1857 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1858 base = TREE_OPERAND (base, 0);
1860 if (base != TREE_OPERAND (iv->base, 0))
1862 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1863 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1865 if (TREE_CODE (base) == INDIRECT_REF)
1866 base = TREE_OPERAND (base, 0);
1867 else
1868 base = build_addr (base);
1869 add_candidate (data, base, iv->step, false, use);
1873 /* Third, try removing the constant offset. */
1874 abase = iv->base;
1875 while (TREE_CODE (abase) == PLUS_EXPR
1876 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1877 abase = TREE_OPERAND (abase, 0);
1878 /* We found the offset, so make the copy of the non-shared part and
1879 remove it. */
1880 if (TREE_CODE (abase) == PLUS_EXPR)
1882 tmp = iv->base;
1883 act = &base;
1885 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1887 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1888 NULL_TREE, TREE_OPERAND (tmp, 1));
1889 act = &TREE_OPERAND (*act, 0);
1891 *act = TREE_OPERAND (tmp, 0);
1893 add_candidate (data, base, iv->step, false, use);
1897 /* Possibly adds pseudocandidate for replacing the final value of USE by
1898 a direct computation. */
1900 static void
1901 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1903 struct tree_niter_desc *niter;
1904 struct loop *loop = data->current_loop;
1906 /* We must know where we exit the loop and how many times does it roll. */
1907 if (!single_dom_exit (loop))
1908 return;
1910 niter = &loop_data (loop)->niter;
1911 if (!niter->niter
1912 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1913 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1914 return;
1916 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1919 /* Adds candidates based on the uses. */
1921 static void
1922 add_derived_ivs_candidates (struct ivopts_data *data)
1924 unsigned i;
1926 for (i = 0; i < n_iv_uses (data); i++)
1928 struct iv_use *use = iv_use (data, i);
1930 if (!use)
1931 continue;
1933 switch (use->type)
1935 case USE_NONLINEAR_EXPR:
1936 case USE_COMPARE:
1937 /* Just add the ivs based on the value of the iv used here. */
1938 add_iv_value_candidates (data, use->iv, use);
1939 break;
1941 case USE_OUTER:
1942 add_iv_value_candidates (data, use->iv, use);
1944 /* Additionally, add the pseudocandidate for the possibility to
1945 replace the final value by a direct computation. */
1946 add_iv_outer_candidates (data, use);
1947 break;
1949 case USE_ADDRESS:
1950 add_address_candidates (data, use->iv, use);
1951 break;
1953 default:
1954 gcc_unreachable ();
1959 /* Record important candidates and add them to related_cands bitmaps
1960 if needed. */
1962 static void
1963 record_important_candidates (struct ivopts_data *data)
1965 unsigned i;
1966 struct iv_use *use;
1968 for (i = 0; i < n_iv_cands (data); i++)
1970 struct iv_cand *cand = iv_cand (data, i);
1972 if (cand->important)
1973 bitmap_set_bit (data->important_candidates, i);
1976 data->consider_all_candidates = (n_iv_cands (data)
1977 <= CONSIDER_ALL_CANDIDATES_BOUND);
1979 if (data->consider_all_candidates)
1981 /* We will not need "related_cands" bitmaps in this case,
1982 so release them to decrease peak memory consumption. */
1983 for (i = 0; i < n_iv_uses (data); i++)
1985 use = iv_use (data, i);
1986 BITMAP_XFREE (use->related_cands);
1989 else
1991 /* Add important candidates to the related_cands bitmaps. */
1992 for (i = 0; i < n_iv_uses (data); i++)
1993 bitmap_ior_into (iv_use (data, i)->related_cands,
1994 data->important_candidates);
1998 /* Finds the candidates for the induction variables. */
2000 static void
2001 find_iv_candidates (struct ivopts_data *data)
2003 /* Add commonly used ivs. */
2004 add_standard_iv_candidates (data);
2006 /* Add old induction variables. */
2007 add_old_ivs_candidates (data);
2009 /* Add induction variables derived from uses. */
2010 add_derived_ivs_candidates (data);
2012 /* Record the important candidates. */
2013 record_important_candidates (data);
2016 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2017 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2018 we allocate a simple list to every use. */
2020 static void
2021 alloc_use_cost_map (struct ivopts_data *data)
2023 unsigned i, size, s, j;
2025 for (i = 0; i < n_iv_uses (data); i++)
2027 struct iv_use *use = iv_use (data, i);
2028 bitmap_iterator bi;
2030 if (data->consider_all_candidates)
2031 size = n_iv_cands (data);
2032 else
2034 s = 0;
2035 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2037 s++;
2040 /* Round up to the power of two, so that moduling by it is fast. */
2041 for (size = 1; size < s; size <<= 1)
2042 continue;
2045 use->n_map_members = size;
2046 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2050 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2051 on invariants DEPENDS_ON. */
2053 static void
2054 set_use_iv_cost (struct ivopts_data *data,
2055 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2056 bitmap depends_on)
2058 unsigned i, s;
2060 if (cost == INFTY)
2062 BITMAP_XFREE (depends_on);
2063 return;
2066 if (data->consider_all_candidates)
2068 use->cost_map[cand->id].cand = cand;
2069 use->cost_map[cand->id].cost = cost;
2070 use->cost_map[cand->id].depends_on = depends_on;
2071 return;
2074 /* n_map_members is a power of two, so this computes modulo. */
2075 s = cand->id & (use->n_map_members - 1);
2076 for (i = s; i < use->n_map_members; i++)
2077 if (!use->cost_map[i].cand)
2078 goto found;
2079 for (i = 0; i < s; i++)
2080 if (!use->cost_map[i].cand)
2081 goto found;
2083 gcc_unreachable ();
2085 found:
2086 use->cost_map[i].cand = cand;
2087 use->cost_map[i].cost = cost;
2088 use->cost_map[i].depends_on = depends_on;
2091 /* Gets cost of (USE, CANDIDATE) pair. */
2093 static struct cost_pair *
2094 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2095 struct iv_cand *cand)
2097 unsigned i, s;
2098 struct cost_pair *ret;
2100 if (!cand)
2101 return NULL;
2103 if (data->consider_all_candidates)
2105 ret = use->cost_map + cand->id;
2106 if (!ret->cand)
2107 return NULL;
2109 return ret;
2112 /* n_map_members is a power of two, so this computes modulo. */
2113 s = cand->id & (use->n_map_members - 1);
2114 for (i = s; i < use->n_map_members; i++)
2115 if (use->cost_map[i].cand == cand)
2116 return use->cost_map + i;
2118 for (i = 0; i < s; i++)
2119 if (use->cost_map[i].cand == cand)
2120 return use->cost_map + i;
2122 return NULL;
2125 /* Returns estimate on cost of computing SEQ. */
2127 static unsigned
2128 seq_cost (rtx seq)
2130 unsigned cost = 0;
2131 rtx set;
2133 for (; seq; seq = NEXT_INSN (seq))
2135 set = single_set (seq);
2136 if (set)
2137 cost += rtx_cost (set, SET);
2138 else
2139 cost++;
2142 return cost;
2145 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2146 static rtx
2147 produce_memory_decl_rtl (tree obj, int *regno)
2149 rtx x;
2150 if (!obj)
2151 abort ();
2152 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2154 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2155 x = gen_rtx_SYMBOL_REF (Pmode, name);
2157 else
2158 x = gen_raw_REG (Pmode, (*regno)++);
2160 return gen_rtx_MEM (DECL_MODE (obj), x);
2163 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2164 walk_tree. DATA contains the actual fake register number. */
2166 static tree
2167 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2169 tree obj = NULL_TREE;
2170 rtx x = NULL_RTX;
2171 int *regno = data;
2173 switch (TREE_CODE (*expr_p))
2175 case ADDR_EXPR:
2176 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2177 handled_component_p (*expr_p);
2178 expr_p = &TREE_OPERAND (*expr_p, 0))
2179 continue;
2180 obj = *expr_p;
2181 if (DECL_P (obj))
2182 x = produce_memory_decl_rtl (obj, regno);
2183 break;
2185 case SSA_NAME:
2186 *ws = 0;
2187 obj = SSA_NAME_VAR (*expr_p);
2188 if (!DECL_RTL_SET_P (obj))
2189 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2190 break;
2192 case VAR_DECL:
2193 case PARM_DECL:
2194 case RESULT_DECL:
2195 *ws = 0;
2196 obj = *expr_p;
2198 if (DECL_RTL_SET_P (obj))
2199 break;
2201 if (DECL_MODE (obj) == BLKmode)
2202 x = produce_memory_decl_rtl (obj, regno);
2203 else
2204 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2206 break;
2208 default:
2209 break;
2212 if (x)
2214 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2215 SET_DECL_RTL (obj, x);
2218 return NULL_TREE;
2221 /* Determines cost of the computation of EXPR. */
2223 static unsigned
2224 computation_cost (tree expr)
2226 rtx seq, rslt;
2227 tree type = TREE_TYPE (expr);
2228 unsigned cost;
2229 int regno = 0;
2231 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2232 start_sequence ();
2233 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2234 seq = get_insns ();
2235 end_sequence ();
2237 cost = seq_cost (seq);
2238 if (GET_CODE (rslt) == MEM)
2239 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2241 return cost;
2244 /* Returns variable containing the value of candidate CAND at statement AT. */
2246 static tree
2247 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2249 if (stmt_after_increment (loop, cand, stmt))
2250 return cand->var_after;
2251 else
2252 return cand->var_before;
2255 /* Determines the expression by that USE is expressed from induction variable
2256 CAND at statement AT in LOOP. */
2258 static tree
2259 get_computation_at (struct loop *loop,
2260 struct iv_use *use, struct iv_cand *cand, tree at)
2262 tree ubase = use->iv->base;
2263 tree ustep = use->iv->step;
2264 tree cbase = cand->iv->base;
2265 tree cstep = cand->iv->step;
2266 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2267 tree uutype;
2268 tree expr, delta;
2269 tree ratio;
2270 unsigned HOST_WIDE_INT ustepi, cstepi;
2271 HOST_WIDE_INT ratioi;
2273 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2275 /* We do not have a precision to express the values of use. */
2276 return NULL_TREE;
2279 expr = var_at_stmt (loop, cand, at);
2281 if (TREE_TYPE (expr) != ctype)
2283 /* This may happen with the original ivs. */
2284 expr = fold_convert (ctype, expr);
2287 if (TYPE_UNSIGNED (utype))
2288 uutype = utype;
2289 else
2291 uutype = unsigned_type_for (utype);
2292 ubase = fold_convert (uutype, ubase);
2293 ustep = fold_convert (uutype, ustep);
2296 if (uutype != ctype)
2298 expr = fold_convert (uutype, expr);
2299 cbase = fold_convert (uutype, cbase);
2300 cstep = fold_convert (uutype, cstep);
2303 if (!cst_and_fits_in_hwi (cstep)
2304 || !cst_and_fits_in_hwi (ustep))
2305 return NULL_TREE;
2307 ustepi = int_cst_value (ustep);
2308 cstepi = int_cst_value (cstep);
2310 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2312 /* TODO maybe consider case when ustep divides cstep and the ratio is
2313 a power of 2 (so that the division is fast to execute)? We would
2314 need to be much more careful with overflows etc. then. */
2315 return NULL_TREE;
2318 /* We may need to shift the value if we are after the increment. */
2319 if (stmt_after_increment (loop, cand, at))
2320 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2322 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2323 or |ratio| == 1, it is better to handle this like
2325 ubase - ratio * cbase + ratio * var. */
2327 if (ratioi == 1)
2329 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2330 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2332 else if (ratioi == -1)
2334 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2335 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2337 else if (TREE_CODE (cbase) == INTEGER_CST)
2339 ratio = build_int_cst_type (uutype, ratioi);
2340 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2341 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2342 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2343 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2345 else
2347 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2348 ratio = build_int_cst_type (uutype, ratioi);
2349 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2350 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2353 return fold_convert (utype, expr);
2356 /* Determines the expression by that USE is expressed from induction variable
2357 CAND in LOOP. */
2359 static tree
2360 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2362 return get_computation_at (loop, use, cand, use->stmt);
2365 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2367 static void
2368 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2370 tree op0, op1;
2371 enum tree_code code;
2373 while (1)
2375 if (cst_and_fits_in_hwi (*expr))
2377 *offset += int_cst_value (*expr);
2378 *expr = integer_zero_node;
2379 return;
2382 code = TREE_CODE (*expr);
2384 if (code != PLUS_EXPR && code != MINUS_EXPR)
2385 return;
2387 op0 = TREE_OPERAND (*expr, 0);
2388 op1 = TREE_OPERAND (*expr, 1);
2390 if (cst_and_fits_in_hwi (op1))
2392 if (code == PLUS_EXPR)
2393 *offset += int_cst_value (op1);
2394 else
2395 *offset -= int_cst_value (op1);
2397 *expr = op0;
2398 continue;
2401 if (code != PLUS_EXPR)
2402 return;
2404 if (!cst_and_fits_in_hwi (op0))
2405 return;
2407 *offset += int_cst_value (op0);
2408 *expr = op1;
2412 /* Returns cost of addition in MODE. */
2414 static unsigned
2415 add_cost (enum machine_mode mode)
2417 static unsigned costs[NUM_MACHINE_MODES];
2418 rtx seq;
2419 unsigned cost;
2421 if (costs[mode])
2422 return costs[mode];
2424 start_sequence ();
2425 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2426 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2427 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2428 NULL_RTX);
2429 seq = get_insns ();
2430 end_sequence ();
2432 cost = seq_cost (seq);
2433 if (!cost)
2434 cost = 1;
2436 costs[mode] = cost;
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 fprintf (dump_file, "Addition in %s costs %d\n",
2440 GET_MODE_NAME (mode), cost);
2441 return cost;
2444 /* Entry in a hashtable of already known costs for multiplication. */
2445 struct mbc_entry
2447 HOST_WIDE_INT cst; /* The constant to multiply by. */
2448 enum machine_mode mode; /* In mode. */
2449 unsigned cost; /* The cost. */
2452 /* Counts hash value for the ENTRY. */
2454 static hashval_t
2455 mbc_entry_hash (const void *entry)
2457 const struct mbc_entry *e = entry;
2459 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2462 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2464 static int
2465 mbc_entry_eq (const void *entry1, const void *entry2)
2467 const struct mbc_entry *e1 = entry1;
2468 const struct mbc_entry *e2 = entry2;
2470 return (e1->mode == e2->mode
2471 && e1->cst == e2->cst);
2474 /* Returns cost of multiplication by constant CST in MODE. */
2476 static unsigned
2477 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2479 static htab_t costs;
2480 struct mbc_entry **cached, act;
2481 rtx seq;
2482 unsigned cost;
2484 if (!costs)
2485 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2487 act.mode = mode;
2488 act.cst = cst;
2489 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2490 if (*cached)
2491 return (*cached)->cost;
2493 *cached = xmalloc (sizeof (struct mbc_entry));
2494 (*cached)->mode = mode;
2495 (*cached)->cst = cst;
2497 start_sequence ();
2498 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2499 NULL_RTX, 0);
2500 seq = get_insns ();
2501 end_sequence ();
2503 cost = seq_cost (seq);
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2506 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2507 (int) cst, GET_MODE_NAME (mode), cost);
2509 (*cached)->cost = cost;
2511 return cost;
2514 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2515 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2516 variable is omitted. The created memory accesses MODE.
2518 TODO -- there must be some better way. This all is quite crude. */
2520 static unsigned
2521 get_address_cost (bool symbol_present, bool var_present,
2522 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2524 #define MAX_RATIO 128
2525 static sbitmap valid_mult;
2526 static HOST_WIDE_INT rat, off;
2527 static HOST_WIDE_INT min_offset, max_offset;
2528 static unsigned costs[2][2][2][2];
2529 unsigned cost, acost;
2530 rtx seq, addr, base;
2531 bool offset_p, ratio_p;
2532 rtx reg1;
2533 HOST_WIDE_INT s_offset;
2534 unsigned HOST_WIDE_INT mask;
2535 unsigned bits;
2537 if (!valid_mult)
2539 HOST_WIDE_INT i;
2541 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2543 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2544 for (i = 1; i <= 1 << 20; i <<= 1)
2546 XEXP (addr, 1) = GEN_INT (i);
2547 if (!memory_address_p (Pmode, addr))
2548 break;
2550 max_offset = i >> 1;
2551 off = max_offset;
2553 for (i = 1; i <= 1 << 20; i <<= 1)
2555 XEXP (addr, 1) = GEN_INT (-i);
2556 if (!memory_address_p (Pmode, addr))
2557 break;
2559 min_offset = -(i >> 1);
2561 if (dump_file && (dump_flags & TDF_DETAILS))
2563 fprintf (dump_file, "get_address_cost:\n");
2564 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2565 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2568 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2569 sbitmap_zero (valid_mult);
2570 rat = 1;
2571 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2572 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2574 XEXP (addr, 1) = GEN_INT (i);
2575 if (memory_address_p (Pmode, addr))
2577 SET_BIT (valid_mult, i + MAX_RATIO);
2578 rat = i;
2582 if (dump_file && (dump_flags & TDF_DETAILS))
2584 fprintf (dump_file, " allowed multipliers:");
2585 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2586 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2587 fprintf (dump_file, " %d", (int) i);
2588 fprintf (dump_file, "\n");
2589 fprintf (dump_file, "\n");
2593 bits = GET_MODE_BITSIZE (Pmode);
2594 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2595 offset &= mask;
2596 if ((offset >> (bits - 1) & 1))
2597 offset |= ~mask;
2598 s_offset = offset;
2600 cost = 0;
2601 offset_p = (s_offset != 0
2602 && min_offset <= s_offset && s_offset <= max_offset);
2603 ratio_p = (ratio != 1
2604 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2605 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2607 if (ratio != 1 && !ratio_p)
2608 cost += multiply_by_cost (ratio, Pmode);
2610 if (s_offset && !offset_p && !symbol_present)
2612 cost += add_cost (Pmode);
2613 var_present = true;
2616 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2617 if (!acost)
2619 acost = 0;
2621 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2622 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2623 if (ratio_p)
2624 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2626 if (var_present)
2627 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2629 if (symbol_present)
2631 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2632 if (offset_p)
2633 base = gen_rtx_fmt_e (CONST, Pmode,
2634 gen_rtx_fmt_ee (PLUS, Pmode,
2635 base,
2636 GEN_INT (off)));
2638 else if (offset_p)
2639 base = GEN_INT (off);
2640 else
2641 base = NULL_RTX;
2643 if (base)
2644 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2646 start_sequence ();
2647 addr = memory_address (Pmode, addr);
2648 seq = get_insns ();
2649 end_sequence ();
2651 acost = seq_cost (seq);
2652 acost += address_cost (addr, Pmode);
2654 if (!acost)
2655 acost = 1;
2656 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2659 return cost + acost;
2662 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2663 the bitmap to that we should store it. */
2665 static struct ivopts_data *fd_ivopts_data;
2666 static tree
2667 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2669 bitmap *depends_on = data;
2670 struct version_info *info;
2672 if (TREE_CODE (*expr_p) != SSA_NAME)
2673 return NULL_TREE;
2674 info = name_info (fd_ivopts_data, *expr_p);
2676 if (!info->inv_id || info->has_nonlin_use)
2677 return NULL_TREE;
2679 if (!*depends_on)
2680 *depends_on = BITMAP_XMALLOC ();
2681 bitmap_set_bit (*depends_on, info->inv_id);
2683 return NULL_TREE;
2686 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2687 invariants the computation depends on. */
2689 static unsigned
2690 force_var_cost (struct ivopts_data *data,
2691 tree expr, bitmap *depends_on)
2693 static bool costs_initialized = false;
2694 static unsigned integer_cost;
2695 static unsigned symbol_cost;
2696 static unsigned address_cost;
2697 tree op0, op1;
2698 unsigned cost0, cost1, cost;
2699 enum machine_mode mode;
2701 if (!costs_initialized)
2703 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2704 rtx x = gen_rtx_MEM (DECL_MODE (var),
2705 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2706 tree addr;
2707 tree type = build_pointer_type (integer_type_node);
2709 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2710 2000));
2712 SET_DECL_RTL (var, x);
2713 TREE_STATIC (var) = 1;
2714 addr = build1 (ADDR_EXPR, type, var);
2715 symbol_cost = computation_cost (addr) + 1;
2717 address_cost
2718 = computation_cost (build2 (PLUS_EXPR, type,
2719 addr,
2720 build_int_cst_type (type, 2000))) + 1;
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "force_var_cost:\n");
2724 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2725 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2726 fprintf (dump_file, " address %d\n", (int) address_cost);
2727 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2728 fprintf (dump_file, "\n");
2731 costs_initialized = true;
2734 if (depends_on)
2736 fd_ivopts_data = data;
2737 walk_tree (&expr, find_depends, depends_on, NULL);
2740 if (SSA_VAR_P (expr))
2741 return 0;
2743 if (TREE_INVARIANT (expr))
2745 if (TREE_CODE (expr) == INTEGER_CST)
2746 return integer_cost;
2748 if (TREE_CODE (expr) == ADDR_EXPR)
2750 tree obj = TREE_OPERAND (expr, 0);
2752 if (TREE_CODE (obj) == VAR_DECL
2753 || TREE_CODE (obj) == PARM_DECL
2754 || TREE_CODE (obj) == RESULT_DECL)
2755 return symbol_cost;
2758 return address_cost;
2761 switch (TREE_CODE (expr))
2763 case PLUS_EXPR:
2764 case MINUS_EXPR:
2765 case MULT_EXPR:
2766 op0 = TREE_OPERAND (expr, 0);
2767 op1 = TREE_OPERAND (expr, 1);
2769 if (is_gimple_val (op0))
2770 cost0 = 0;
2771 else
2772 cost0 = force_var_cost (data, op0, NULL);
2774 if (is_gimple_val (op1))
2775 cost1 = 0;
2776 else
2777 cost1 = force_var_cost (data, op1, NULL);
2779 break;
2781 default:
2782 /* Just an arbitrary value, FIXME. */
2783 return target_spill_cost;
2786 mode = TYPE_MODE (TREE_TYPE (expr));
2787 switch (TREE_CODE (expr))
2789 case PLUS_EXPR:
2790 case MINUS_EXPR:
2791 cost = add_cost (mode);
2792 break;
2794 case MULT_EXPR:
2795 if (cst_and_fits_in_hwi (op0))
2796 cost = multiply_by_cost (int_cst_value (op0), mode);
2797 else if (cst_and_fits_in_hwi (op1))
2798 cost = multiply_by_cost (int_cst_value (op1), mode);
2799 else
2800 return target_spill_cost;
2801 break;
2803 default:
2804 gcc_unreachable ();
2807 cost += cost0;
2808 cost += cost1;
2810 /* Bound the cost by target_spill_cost. The parts of complicated
2811 computations often are either loop invariant or at least can
2812 be shared between several iv uses, so letting this grow without
2813 limits would not give reasonable results. */
2814 return cost < target_spill_cost ? cost : target_spill_cost;
2817 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2818 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2819 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2820 invariants the computation depends on. */
2822 static unsigned
2823 split_address_cost (struct ivopts_data *data,
2824 tree addr, bool *symbol_present, bool *var_present,
2825 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2827 tree core;
2828 HOST_WIDE_INT bitsize;
2829 HOST_WIDE_INT bitpos;
2830 tree toffset;
2831 enum machine_mode mode;
2832 int unsignedp, volatilep;
2834 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2835 &unsignedp, &volatilep, false);
2837 if (toffset != 0
2838 || bitpos % BITS_PER_UNIT != 0
2839 || TREE_CODE (core) != VAR_DECL)
2841 *symbol_present = false;
2842 *var_present = true;
2843 fd_ivopts_data = data;
2844 walk_tree (&addr, find_depends, depends_on, NULL);
2845 return target_spill_cost;
2848 *offset += bitpos / BITS_PER_UNIT;
2849 if (TREE_STATIC (core)
2850 || DECL_EXTERNAL (core))
2852 *symbol_present = true;
2853 *var_present = false;
2854 return 0;
2857 *symbol_present = false;
2858 *var_present = true;
2859 return 0;
2862 /* Estimates cost of expressing difference of addresses E1 - E2 as
2863 var + symbol + offset. The value of offset is added to OFFSET,
2864 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2865 part is missing. DEPENDS_ON is a set of the invariants the computation
2866 depends on. */
2868 static unsigned
2869 ptr_difference_cost (struct ivopts_data *data,
2870 tree e1, tree e2, bool *symbol_present, bool *var_present,
2871 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2873 HOST_WIDE_INT diff = 0;
2874 unsigned cost;
2876 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2878 if (ptr_difference_const (e1, e2, &diff))
2880 *offset += diff;
2881 *symbol_present = false;
2882 *var_present = false;
2883 return 0;
2886 if (e2 == integer_zero_node)
2887 return split_address_cost (data, TREE_OPERAND (e1, 0),
2888 symbol_present, var_present, offset, depends_on);
2890 *symbol_present = false;
2891 *var_present = true;
2893 cost = force_var_cost (data, e1, depends_on);
2894 cost += force_var_cost (data, e2, depends_on);
2895 cost += add_cost (Pmode);
2897 return cost;
2900 /* Estimates cost of expressing difference E1 - E2 as
2901 var + symbol + offset. The value of offset is added to OFFSET,
2902 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2903 part is missing. DEPENDS_ON is a set of the invariants the computation
2904 depends on. */
2906 static unsigned
2907 difference_cost (struct ivopts_data *data,
2908 tree e1, tree e2, bool *symbol_present, bool *var_present,
2909 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2911 unsigned cost;
2912 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2914 strip_offset (&e1, offset);
2915 *offset = -*offset;
2916 strip_offset (&e2, offset);
2917 *offset = -*offset;
2919 if (TREE_CODE (e1) == ADDR_EXPR)
2920 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2921 depends_on);
2922 *symbol_present = false;
2924 if (operand_equal_p (e1, e2, 0))
2926 *var_present = false;
2927 return 0;
2929 *var_present = true;
2930 if (zero_p (e2))
2931 return force_var_cost (data, e1, depends_on);
2933 if (zero_p (e1))
2935 cost = force_var_cost (data, e2, depends_on);
2936 cost += multiply_by_cost (-1, mode);
2938 return cost;
2941 cost = force_var_cost (data, e1, depends_on);
2942 cost += force_var_cost (data, e2, depends_on);
2943 cost += add_cost (mode);
2945 return cost;
2948 /* Determines the cost of the computation by that USE is expressed
2949 from induction variable CAND. If ADDRESS_P is true, we just need
2950 to create an address from it, otherwise we want to get it into
2951 register. A set of invariants we depend on is stored in
2952 DEPENDS_ON. AT is the statement at that the value is computed. */
2954 static unsigned
2955 get_computation_cost_at (struct ivopts_data *data,
2956 struct iv_use *use, struct iv_cand *cand,
2957 bool address_p, bitmap *depends_on, tree at)
2959 tree ubase = use->iv->base, ustep = use->iv->step;
2960 tree cbase, cstep;
2961 tree utype = TREE_TYPE (ubase), ctype;
2962 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2963 HOST_WIDE_INT ratio, aratio;
2964 bool var_present, symbol_present;
2965 unsigned cost = 0, n_sums;
2967 *depends_on = NULL;
2969 /* Only consider real candidates. */
2970 if (!cand->iv)
2971 return INFTY;
2973 cbase = cand->iv->base;
2974 cstep = cand->iv->step;
2975 ctype = TREE_TYPE (cbase);
2977 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2979 /* We do not have a precision to express the values of use. */
2980 return INFTY;
2983 if (address_p)
2985 /* Do not try to express address of an object with computation based
2986 on address of a different object. This may cause problems in rtl
2987 level alias analysis (that does not expect this to be happening,
2988 as this is illegal in C), and would be unlikely to be useful
2989 anyway. */
2990 if (use->iv->base_object
2991 && cand->iv->base_object
2992 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2993 return INFTY;
2996 if (!cst_and_fits_in_hwi (ustep)
2997 || !cst_and_fits_in_hwi (cstep))
2998 return INFTY;
3000 if (TREE_CODE (ubase) == INTEGER_CST
3001 && !cst_and_fits_in_hwi (ubase))
3002 goto fallback;
3004 if (TREE_CODE (cbase) == INTEGER_CST
3005 && !cst_and_fits_in_hwi (cbase))
3006 goto fallback;
3008 ustepi = int_cst_value (ustep);
3009 cstepi = int_cst_value (cstep);
3011 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3013 /* TODO -- add direct handling of this case. */
3014 goto fallback;
3017 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3018 return INFTY;
3020 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3021 or ratio == 1, it is better to handle this like
3023 ubase - ratio * cbase + ratio * var
3025 (also holds in the case ratio == -1, TODO. */
3027 if (TREE_CODE (cbase) == INTEGER_CST)
3029 offset = - ratio * int_cst_value (cbase);
3030 cost += difference_cost (data,
3031 ubase, integer_zero_node,
3032 &symbol_present, &var_present, &offset,
3033 depends_on);
3035 else if (ratio == 1)
3037 cost += difference_cost (data,
3038 ubase, cbase,
3039 &symbol_present, &var_present, &offset,
3040 depends_on);
3042 else
3044 cost += force_var_cost (data, cbase, depends_on);
3045 cost += add_cost (TYPE_MODE (ctype));
3046 cost += difference_cost (data,
3047 ubase, integer_zero_node,
3048 &symbol_present, &var_present, &offset,
3049 depends_on);
3052 /* If we are after the increment, the value of the candidate is higher by
3053 one iteration. */
3054 if (stmt_after_increment (data->current_loop, cand, at))
3055 offset -= ratio * cstepi;
3057 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3058 (symbol/var/const parts may be omitted). If we are looking for an address,
3059 find the cost of addressing this. */
3060 if (address_p)
3061 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3063 /* Otherwise estimate the costs for computing the expression. */
3064 aratio = ratio > 0 ? ratio : -ratio;
3065 if (!symbol_present && !var_present && !offset)
3067 if (ratio != 1)
3068 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3070 return cost;
3073 if (aratio != 1)
3074 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3076 n_sums = 1;
3077 if (var_present
3078 /* Symbol + offset should be compile-time computable. */
3079 && (symbol_present || offset))
3080 n_sums++;
3082 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3084 fallback:
3086 /* Just get the expression, expand it and measure the cost. */
3087 tree comp = get_computation_at (data->current_loop, use, cand, at);
3089 if (!comp)
3090 return INFTY;
3092 if (address_p)
3093 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3095 return computation_cost (comp);
3099 /* Determines the cost of the computation by that USE is expressed
3100 from induction variable CAND. If ADDRESS_P is true, we just need
3101 to create an address from it, otherwise we want to get it into
3102 register. A set of invariants we depend on is stored in
3103 DEPENDS_ON. */
3105 static unsigned
3106 get_computation_cost (struct ivopts_data *data,
3107 struct iv_use *use, struct iv_cand *cand,
3108 bool address_p, bitmap *depends_on)
3110 return get_computation_cost_at (data,
3111 use, cand, address_p, depends_on, use->stmt);
3114 /* Determines cost of basing replacement of USE on CAND in a generic
3115 expression. */
3117 static bool
3118 determine_use_iv_cost_generic (struct ivopts_data *data,
3119 struct iv_use *use, struct iv_cand *cand)
3121 bitmap depends_on;
3122 unsigned cost;
3124 /* The simple case first -- if we need to express value of the preserved
3125 original biv, the cost is 0. This also prevents us from counting the
3126 cost of increment twice -- once at this use and once in the cost of
3127 the candidate. */
3128 if (cand->pos == IP_ORIGINAL
3129 && cand->incremented_at == use->stmt)
3131 set_use_iv_cost (data, use, cand, 0, NULL);
3132 return true;
3135 cost = get_computation_cost (data, use, cand, false, &depends_on);
3136 set_use_iv_cost (data, use, cand, cost, depends_on);
3138 return cost != INFTY;
3141 /* Determines cost of basing replacement of USE on CAND in an address. */
3143 static bool
3144 determine_use_iv_cost_address (struct ivopts_data *data,
3145 struct iv_use *use, struct iv_cand *cand)
3147 bitmap depends_on;
3148 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3150 set_use_iv_cost (data, use, cand, cost, depends_on);
3152 return cost != INFTY;
3155 /* Computes value of induction variable IV in iteration NITER. */
3157 static tree
3158 iv_value (struct iv *iv, tree niter)
3160 tree val;
3161 tree type = TREE_TYPE (iv->base);
3163 niter = fold_convert (type, niter);
3164 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3166 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3169 /* Computes value of candidate CAND at position AT in iteration NITER. */
3171 static tree
3172 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3174 tree val = iv_value (cand->iv, niter);
3175 tree type = TREE_TYPE (cand->iv->base);
3177 if (stmt_after_increment (loop, cand, at))
3178 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3180 return val;
3183 /* Check whether it is possible to express the condition in USE by comparison
3184 of candidate CAND. If so, store the comparison code to COMPARE and the
3185 value compared with to BOUND. */
3187 static bool
3188 may_eliminate_iv (struct loop *loop,
3189 struct iv_use *use, struct iv_cand *cand,
3190 enum tree_code *compare, tree *bound)
3192 basic_block ex_bb;
3193 edge exit;
3194 struct tree_niter_desc niter, new_niter;
3195 tree wider_type, type, base;
3197 /* For now works only for exits that dominate the loop latch. TODO -- extend
3198 for other conditions inside loop body. */
3199 ex_bb = bb_for_stmt (use->stmt);
3200 if (use->stmt != last_stmt (ex_bb)
3201 || TREE_CODE (use->stmt) != COND_EXPR)
3202 return false;
3203 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3204 return false;
3206 exit = EDGE_SUCC (ex_bb, 0);
3207 if (flow_bb_inside_loop_p (loop, exit->dest))
3208 exit = EDGE_SUCC (ex_bb, 1);
3209 if (flow_bb_inside_loop_p (loop, exit->dest))
3210 return false;
3212 niter.niter = NULL_TREE;
3213 number_of_iterations_exit (loop, exit, &niter);
3214 if (!niter.niter
3215 || !integer_nonzerop (niter.assumptions)
3216 || !integer_zerop (niter.may_be_zero))
3217 return false;
3219 if (exit->flags & EDGE_TRUE_VALUE)
3220 *compare = EQ_EXPR;
3221 else
3222 *compare = NE_EXPR;
3224 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3226 /* Let us check there is not some problem with overflows, by checking that
3227 the number of iterations is unchanged. */
3228 base = cand->iv->base;
3229 type = TREE_TYPE (base);
3230 if (stmt_after_increment (loop, cand, use->stmt))
3231 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3233 new_niter.niter = NULL_TREE;
3234 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3235 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3236 &new_niter);
3237 if (!new_niter.niter
3238 || !integer_nonzerop (new_niter.assumptions)
3239 || !integer_zerop (new_niter.may_be_zero))
3240 return false;
3242 wider_type = TREE_TYPE (new_niter.niter);
3243 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3244 wider_type = TREE_TYPE (niter.niter);
3245 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3246 fold_convert (wider_type, new_niter.niter), 0))
3247 return false;
3249 return true;
3252 /* Determines cost of basing replacement of USE on CAND in a condition. */
3254 static bool
3255 determine_use_iv_cost_condition (struct ivopts_data *data,
3256 struct iv_use *use, struct iv_cand *cand)
3258 tree bound;
3259 enum tree_code compare;
3261 /* Only consider real candidates. */
3262 if (!cand->iv)
3264 set_use_iv_cost (data, use, cand, INFTY, NULL);
3265 return false;
3268 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3270 bitmap depends_on = NULL;
3271 unsigned cost = force_var_cost (data, bound, &depends_on);
3273 set_use_iv_cost (data, use, cand, cost, depends_on);
3274 return cost != INFTY;
3277 /* The induction variable elimination failed; just express the original
3278 giv. If it is compared with an invariant, note that we cannot get
3279 rid of it. */
3280 if (TREE_CODE (*use->op_p) == SSA_NAME)
3281 record_invariant (data, *use->op_p, true);
3282 else
3284 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3285 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3288 return determine_use_iv_cost_generic (data, use, cand);
3291 /* Checks whether it is possible to replace the final value of USE by
3292 a direct computation. If so, the formula is stored to *VALUE. */
3294 static bool
3295 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3297 edge exit;
3298 struct tree_niter_desc *niter;
3300 exit = single_dom_exit (loop);
3301 if (!exit)
3302 return false;
3304 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3305 bb_for_stmt (use->stmt)));
3307 niter = &loop_data (loop)->niter;
3308 if (!niter->niter
3309 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3310 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3311 return false;
3313 *value = iv_value (use->iv, niter->niter);
3315 return true;
3318 /* Determines cost of replacing final value of USE using CAND. */
3320 static bool
3321 determine_use_iv_cost_outer (struct ivopts_data *data,
3322 struct iv_use *use, struct iv_cand *cand)
3324 bitmap depends_on;
3325 unsigned cost;
3326 edge exit;
3327 tree value;
3328 struct loop *loop = data->current_loop;
3330 /* The simple case first -- if we need to express value of the preserved
3331 original biv, the cost is 0. This also prevents us from counting the
3332 cost of increment twice -- once at this use and once in the cost of
3333 the candidate. */
3334 if (cand->pos == IP_ORIGINAL
3335 && cand->incremented_at == use->stmt)
3337 set_use_iv_cost (data, use, cand, 0, NULL);
3338 return true;
3341 if (!cand->iv)
3343 if (!may_replace_final_value (loop, use, &value))
3345 set_use_iv_cost (data, use, cand, INFTY, NULL);
3346 return false;
3349 depends_on = NULL;
3350 cost = force_var_cost (data, value, &depends_on);
3352 cost /= AVG_LOOP_NITER (loop);
3354 set_use_iv_cost (data, use, cand, cost, depends_on);
3355 return cost != INFTY;
3358 exit = single_dom_exit (loop);
3359 if (exit)
3361 /* If there is just a single exit, we may use value of the candidate
3362 after we take it to determine the value of use. */
3363 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3364 last_stmt (exit->src));
3365 if (cost != INFTY)
3366 cost /= AVG_LOOP_NITER (loop);
3368 else
3370 /* Otherwise we just need to compute the iv. */
3371 cost = get_computation_cost (data, use, cand, false, &depends_on);
3374 set_use_iv_cost (data, use, cand, cost, depends_on);
3376 return cost != INFTY;
3379 /* Determines cost of basing replacement of USE on CAND. Returns false
3380 if USE cannot be based on CAND. */
3382 static bool
3383 determine_use_iv_cost (struct ivopts_data *data,
3384 struct iv_use *use, struct iv_cand *cand)
3386 switch (use->type)
3388 case USE_NONLINEAR_EXPR:
3389 return determine_use_iv_cost_generic (data, use, cand);
3391 case USE_OUTER:
3392 return determine_use_iv_cost_outer (data, use, cand);
3394 case USE_ADDRESS:
3395 return determine_use_iv_cost_address (data, use, cand);
3397 case USE_COMPARE:
3398 return determine_use_iv_cost_condition (data, use, cand);
3400 default:
3401 gcc_unreachable ();
3405 /* Determines costs of basing the use of the iv on an iv candidate. */
3407 static void
3408 determine_use_iv_costs (struct ivopts_data *data)
3410 unsigned i, j;
3411 struct iv_use *use;
3412 struct iv_cand *cand;
3413 bitmap to_clear = BITMAP_XMALLOC ();
3415 alloc_use_cost_map (data);
3417 for (i = 0; i < n_iv_uses (data); i++)
3419 use = iv_use (data, i);
3421 if (data->consider_all_candidates)
3423 for (j = 0; j < n_iv_cands (data); j++)
3425 cand = iv_cand (data, j);
3426 determine_use_iv_cost (data, use, cand);
3429 else
3431 bitmap_iterator bi;
3433 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3435 cand = iv_cand (data, j);
3436 if (!determine_use_iv_cost (data, use, cand))
3437 bitmap_set_bit (to_clear, j);
3440 /* Remove the candidates for that the cost is infinite from
3441 the list of related candidates. */
3442 bitmap_and_compl_into (use->related_cands, to_clear);
3443 bitmap_clear (to_clear);
3447 BITMAP_XFREE (to_clear);
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (dump_file, "Use-candidate costs:\n");
3453 for (i = 0; i < n_iv_uses (data); i++)
3455 use = iv_use (data, i);
3457 fprintf (dump_file, "Use %d:\n", i);
3458 fprintf (dump_file, " cand\tcost\tdepends on\n");
3459 for (j = 0; j < use->n_map_members; j++)
3461 if (!use->cost_map[j].cand
3462 || use->cost_map[j].cost == INFTY)
3463 continue;
3465 fprintf (dump_file, " %d\t%d\t",
3466 use->cost_map[j].cand->id,
3467 use->cost_map[j].cost);
3468 if (use->cost_map[j].depends_on)
3469 bitmap_print (dump_file,
3470 use->cost_map[j].depends_on, "","");
3471 fprintf (dump_file, "\n");
3474 fprintf (dump_file, "\n");
3476 fprintf (dump_file, "\n");
3480 /* Determines cost of the candidate CAND. */
3482 static void
3483 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3485 unsigned cost_base, cost_step;
3486 tree base, last;
3487 basic_block bb;
3489 if (!cand->iv)
3491 cand->cost = 0;
3492 return;
3495 /* There are two costs associated with the candidate -- its increment
3496 and its initialization. The second is almost negligible for any loop
3497 that rolls enough, so we take it just very little into account. */
3499 base = cand->iv->base;
3500 cost_base = force_var_cost (data, base, NULL);
3501 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3503 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3505 /* Prefer the original iv unless we may gain something by replacing it. */
3506 if (cand->pos == IP_ORIGINAL)
3507 cand->cost--;
3509 /* Prefer not to insert statements into latch unless there are some
3510 already (so that we do not create unnecessary jumps). */
3511 if (cand->pos == IP_END)
3513 bb = ip_end_pos (data->current_loop);
3514 last = last_stmt (bb);
3516 if (!last
3517 || TREE_CODE (last) == LABEL_EXPR)
3518 cand->cost++;
3522 /* Determines costs of computation of the candidates. */
3524 static void
3525 determine_iv_costs (struct ivopts_data *data)
3527 unsigned i;
3529 if (dump_file && (dump_flags & TDF_DETAILS))
3531 fprintf (dump_file, "Candidate costs:\n");
3532 fprintf (dump_file, " cand\tcost\n");
3535 for (i = 0; i < n_iv_cands (data); i++)
3537 struct iv_cand *cand = iv_cand (data, i);
3539 determine_iv_cost (data, cand);
3541 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file, "\n");
3549 /* Calculates cost for having SIZE induction variables. */
3551 static unsigned
3552 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3554 return global_cost_for_size (size,
3555 loop_data (data->current_loop)->regs_used,
3556 n_iv_uses (data));
3559 /* For each size of the induction variable set determine the penalty. */
3561 static void
3562 determine_set_costs (struct ivopts_data *data)
3564 unsigned j, n;
3565 tree phi, op;
3566 struct loop *loop = data->current_loop;
3567 bitmap_iterator bi;
3569 /* We use the following model (definitely improvable, especially the
3570 cost function -- TODO):
3572 We estimate the number of registers available (using MD data), name it A.
3574 We estimate the number of registers used by the loop, name it U. This
3575 number is obtained as the number of loop phi nodes (not counting virtual
3576 registers and bivs) + the number of variables from outside of the loop.
3578 We set a reserve R (free regs that are used for temporary computations,
3579 etc.). For now the reserve is a constant 3.
3581 Let I be the number of induction variables.
3583 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3584 make a lot of ivs without a reason).
3585 -- if A - R < U + I <= A, the cost is I * PRES_COST
3586 -- if U + I > A, the cost is I * PRES_COST and
3587 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3589 if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (dump_file, "Global costs:\n");
3592 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3593 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3594 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3595 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3598 n = 0;
3599 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3601 op = PHI_RESULT (phi);
3603 if (!is_gimple_reg (op))
3604 continue;
3606 if (get_iv (data, op))
3607 continue;
3609 n++;
3612 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3614 struct version_info *info = ver_info (data, j);
3616 if (info->inv_id && info->has_nonlin_use)
3617 n++;
3620 loop_data (loop)->regs_used = n;
3621 if (dump_file && (dump_flags & TDF_DETAILS))
3622 fprintf (dump_file, " regs_used %d\n", n);
3624 if (dump_file && (dump_flags & TDF_DETAILS))
3626 fprintf (dump_file, " cost for size:\n");
3627 fprintf (dump_file, " ivs\tcost\n");
3628 for (j = 0; j <= 2 * target_avail_regs; j++)
3629 fprintf (dump_file, " %d\t%d\n", j,
3630 ivopts_global_cost_for_size (data, j));
3631 fprintf (dump_file, "\n");
3635 /* Returns true if A is a cheaper cost pair than B. */
3637 static bool
3638 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3640 if (!a)
3641 return false;
3643 if (!b)
3644 return true;
3646 if (a->cost < b->cost)
3647 return true;
3649 if (a->cost > b->cost)
3650 return false;
3652 /* In case the costs are the same, prefer the cheaper candidate. */
3653 if (a->cand->cost < b->cand->cost)
3654 return true;
3656 return false;
3659 /* Computes the cost field of IVS structure. */
3661 static void
3662 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3664 unsigned cost = 0;
3666 cost += ivs->cand_use_cost;
3667 cost += ivs->cand_cost;
3668 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3670 ivs->cost = cost;
3673 /* Set USE not to be expressed by any candidate in IVS. */
3675 static void
3676 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3677 struct iv_use *use)
3679 unsigned uid = use->id, cid, iid;
3680 bitmap deps;
3681 struct cost_pair *cp;
3682 bitmap_iterator bi;
3684 cp = ivs->cand_for_use[uid];
3685 if (!cp)
3686 return;
3687 cid = cp->cand->id;
3689 ivs->bad_uses++;
3690 ivs->cand_for_use[uid] = NULL;
3691 ivs->n_cand_uses[cid]--;
3693 if (ivs->n_cand_uses[cid] == 0)
3695 bitmap_clear_bit (ivs->cands, cid);
3696 /* Do not count the pseudocandidates. */
3697 if (cp->cand->iv)
3698 ivs->n_regs--;
3699 ivs->n_cands--;
3700 ivs->cand_cost -= cp->cand->cost;
3703 ivs->cand_use_cost -= cp->cost;
3705 deps = cp->depends_on;
3707 if (deps)
3709 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3711 ivs->n_invariant_uses[iid]--;
3712 if (ivs->n_invariant_uses[iid] == 0)
3713 ivs->n_regs--;
3717 iv_ca_recount_cost (data, ivs);
3720 /* Set cost pair for USE in set IVS to CP. */
3722 static void
3723 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3724 struct iv_use *use, struct cost_pair *cp)
3726 unsigned uid = use->id, cid, iid;
3727 bitmap deps;
3728 bitmap_iterator bi;
3730 if (ivs->cand_for_use[uid] == cp)
3731 return;
3733 if (ivs->cand_for_use[uid])
3734 iv_ca_set_no_cp (data, ivs, use);
3736 if (cp)
3738 cid = cp->cand->id;
3740 ivs->bad_uses--;
3741 ivs->cand_for_use[uid] = cp;
3742 ivs->n_cand_uses[cid]++;
3743 if (ivs->n_cand_uses[cid] == 1)
3745 bitmap_set_bit (ivs->cands, cid);
3746 /* Do not count the pseudocandidates. */
3747 if (cp->cand->iv)
3748 ivs->n_regs++;
3749 ivs->n_cands++;
3750 ivs->cand_cost += cp->cand->cost;
3753 ivs->cand_use_cost += cp->cost;
3755 deps = cp->depends_on;
3757 if (deps)
3759 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3761 ivs->n_invariant_uses[iid]++;
3762 if (ivs->n_invariant_uses[iid] == 1)
3763 ivs->n_regs++;
3767 iv_ca_recount_cost (data, ivs);
3771 /* Extend set IVS by expressing USE by some of the candidates in it
3772 if possible. */
3774 static void
3775 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3776 struct iv_use *use)
3778 struct cost_pair *best_cp = NULL, *cp;
3779 bitmap_iterator bi;
3780 unsigned i;
3782 gcc_assert (ivs->upto >= use->id);
3784 if (ivs->upto == use->id)
3786 ivs->upto++;
3787 ivs->bad_uses++;
3790 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3792 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3794 if (cheaper_cost_pair (cp, best_cp))
3795 best_cp = cp;
3798 iv_ca_set_cp (data, ivs, use, best_cp);
3801 /* Get cost for assignment IVS. */
3803 static unsigned
3804 iv_ca_cost (struct iv_ca *ivs)
3806 return (ivs->bad_uses ? INFTY : ivs->cost);
3809 /* Returns true if all dependences of CP are among invariants in IVS. */
3811 static bool
3812 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3814 unsigned i;
3815 bitmap_iterator bi;
3817 if (!cp->depends_on)
3818 return true;
3820 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3822 if (ivs->n_invariant_uses[i] == 0)
3823 return false;
3826 return true;
3829 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3830 it before NEXT_CHANGE. */
3832 static struct iv_ca_delta *
3833 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3834 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3836 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3838 change->use = use;
3839 change->old_cp = old_cp;
3840 change->new_cp = new_cp;
3841 change->next_change = next_change;
3843 return change;
3846 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3847 are rewritten. */
3849 static struct iv_ca_delta *
3850 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
3852 struct iv_ca_delta *last;
3854 if (!l2)
3855 return l1;
3857 if (!l1)
3858 return l2;
3860 for (last = l1; last->next_change; last = last->next_change)
3861 continue;
3862 last->next_change = l2;
3864 return l1;
3867 /* Returns candidate by that USE is expressed in IVS. */
3869 static struct cost_pair *
3870 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3872 return ivs->cand_for_use[use->id];
3875 /* Reverse the list of changes DELTA, forming the inverse to it. */
3877 static struct iv_ca_delta *
3878 iv_ca_delta_reverse (struct iv_ca_delta *delta)
3880 struct iv_ca_delta *act, *next, *prev = NULL;
3881 struct cost_pair *tmp;
3883 for (act = delta; act; act = next)
3885 next = act->next_change;
3886 act->next_change = prev;
3887 prev = act;
3889 tmp = act->old_cp;
3890 act->old_cp = act->new_cp;
3891 act->new_cp = tmp;
3894 return prev;
3897 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3898 reverted instead. */
3900 static void
3901 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3902 struct iv_ca_delta *delta, bool forward)
3904 struct cost_pair *from, *to;
3905 struct iv_ca_delta *act;
3907 if (!forward)
3908 delta = iv_ca_delta_reverse (delta);
3910 for (act = delta; act; act = act->next_change)
3912 from = act->old_cp;
3913 to = act->new_cp;
3914 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
3915 iv_ca_set_cp (data, ivs, act->use, to);
3918 if (!forward)
3919 iv_ca_delta_reverse (delta);
3922 /* Returns true if CAND is used in IVS. */
3924 static bool
3925 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3927 return ivs->n_cand_uses[cand->id] > 0;
3930 /* Returns number of induction variable candidates in the set IVS. */
3932 static unsigned
3933 iv_ca_n_cands (struct iv_ca *ivs)
3935 return ivs->n_cands;
3938 /* Free the list of changes DELTA. */
3940 static void
3941 iv_ca_delta_free (struct iv_ca_delta **delta)
3943 struct iv_ca_delta *act, *next;
3945 for (act = *delta; act; act = next)
3947 next = act->next_change;
3948 free (act);
3951 *delta = NULL;
3954 /* Allocates new iv candidates assignment. */
3956 static struct iv_ca *
3957 iv_ca_new (struct ivopts_data *data)
3959 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3961 nw->upto = 0;
3962 nw->bad_uses = 0;
3963 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3964 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
3965 nw->cands = BITMAP_XMALLOC ();
3966 nw->n_cands = 0;
3967 nw->n_regs = 0;
3968 nw->cand_use_cost = 0;
3969 nw->cand_cost = 0;
3970 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
3971 nw->cost = 0;
3973 return nw;
3976 /* Free memory occupied by the set IVS. */
3978 static void
3979 iv_ca_free (struct iv_ca **ivs)
3981 free ((*ivs)->cand_for_use);
3982 free ((*ivs)->n_cand_uses);
3983 BITMAP_XFREE ((*ivs)->cands);
3984 free ((*ivs)->n_invariant_uses);
3985 free (*ivs);
3986 *ivs = NULL;
3989 /* Dumps IVS to FILE. */
3991 static void
3992 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
3994 const char *pref = " invariants ";
3995 unsigned i;
3997 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
3998 bitmap_print (file, ivs->cands, " candidates ","\n");
4000 for (i = 1; i <= data->max_inv_id; i++)
4001 if (ivs->n_invariant_uses[i])
4003 fprintf (file, "%s%d", pref, i);
4004 pref = ", ";
4006 fprintf (file, "\n");
4009 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4010 new set, and store differences in DELTA. Number of induction variables
4011 in the new set is stored to N_IVS. */
4013 static unsigned
4014 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4015 struct iv_cand *cand, struct iv_ca_delta **delta,
4016 unsigned *n_ivs)
4018 unsigned i, cost;
4019 struct iv_use *use;
4020 struct cost_pair *old_cp, *new_cp;
4022 *delta = NULL;
4023 for (i = 0; i < ivs->upto; i++)
4025 use = iv_use (data, i);
4026 old_cp = iv_ca_cand_for_use (ivs, use);
4028 if (old_cp
4029 && old_cp->cand == cand)
4030 continue;
4032 new_cp = get_use_iv_cost (data, use, cand);
4033 if (!new_cp)
4034 continue;
4036 if (!iv_ca_has_deps (ivs, new_cp))
4037 continue;
4039 if (!cheaper_cost_pair (new_cp, old_cp))
4040 continue;
4042 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4045 iv_ca_delta_commit (data, ivs, *delta, true);
4046 cost = iv_ca_cost (ivs);
4047 if (n_ivs)
4048 *n_ivs = iv_ca_n_cands (ivs);
4049 iv_ca_delta_commit (data, ivs, *delta, false);
4051 return cost;
4054 /* Try narrowing set IVS by removing CAND. Return the cost of
4055 the new set and store the differences in DELTA. */
4057 static unsigned
4058 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4059 struct iv_cand *cand, struct iv_ca_delta **delta)
4061 unsigned i, ci;
4062 struct iv_use *use;
4063 struct cost_pair *old_cp, *new_cp, *cp;
4064 bitmap_iterator bi;
4065 struct iv_cand *cnd;
4066 unsigned cost;
4068 *delta = NULL;
4069 for (i = 0; i < n_iv_uses (data); i++)
4071 use = iv_use (data, i);
4073 old_cp = iv_ca_cand_for_use (ivs, use);
4074 if (old_cp->cand != cand)
4075 continue;
4077 new_cp = NULL;
4079 if (data->consider_all_candidates)
4081 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4083 if (ci == cand->id)
4084 continue;
4086 cnd = iv_cand (data, ci);
4088 cp = get_use_iv_cost (data, use, cnd);
4089 if (!cp)
4090 continue;
4091 if (!iv_ca_has_deps (ivs, cp))
4092 continue;
4094 if (!cheaper_cost_pair (cp, new_cp))
4095 continue;
4097 new_cp = cp;
4100 else
4102 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4104 if (ci == cand->id)
4105 continue;
4107 cnd = iv_cand (data, ci);
4109 cp = get_use_iv_cost (data, use, cnd);
4110 if (!cp)
4111 continue;
4112 if (!iv_ca_has_deps (ivs, cp))
4113 continue;
4115 if (!cheaper_cost_pair (cp, new_cp))
4116 continue;
4118 new_cp = cp;
4122 if (!new_cp)
4124 iv_ca_delta_free (delta);
4125 return INFTY;
4128 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4131 iv_ca_delta_commit (data, ivs, *delta, true);
4132 cost = iv_ca_cost (ivs);
4133 iv_ca_delta_commit (data, ivs, *delta, false);
4135 return cost;
4138 /* Try optimizing the set of candidates IVS by removing candidates different
4139 from to EXCEPT_CAND from it. Return cost of the new set, and store
4140 differences in DELTA. */
4142 static unsigned
4143 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4144 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4146 bitmap_iterator bi;
4147 struct iv_ca_delta *act_delta, *best_delta;
4148 unsigned i, best_cost, acost;
4149 struct iv_cand *cand;
4151 best_delta = NULL;
4152 best_cost = iv_ca_cost (ivs);
4154 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4156 cand = iv_cand (data, i);
4158 if (cand == except_cand)
4159 continue;
4161 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4163 if (acost < best_cost)
4165 best_cost = acost;
4166 iv_ca_delta_free (&best_delta);
4167 best_delta = act_delta;
4169 else
4170 iv_ca_delta_free (&act_delta);
4173 if (!best_delta)
4175 *delta = NULL;
4176 return best_cost;
4179 /* Recurse to possibly remove other unnecessary ivs. */
4180 iv_ca_delta_commit (data, ivs, best_delta, true);
4181 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4182 iv_ca_delta_commit (data, ivs, best_delta, false);
4183 *delta = iv_ca_delta_join (best_delta, *delta);
4184 return best_cost;
4187 /* Tries to extend the sets IVS in the best possible way in order
4188 to express the USE. */
4190 static bool
4191 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4192 struct iv_use *use)
4194 unsigned best_cost, act_cost;
4195 unsigned i;
4196 bitmap_iterator bi;
4197 struct iv_cand *cand;
4198 struct iv_ca_delta *best_delta = NULL, *act_delta;
4199 struct cost_pair *cp;
4201 iv_ca_add_use (data, ivs, use);
4202 best_cost = iv_ca_cost (ivs);
4204 cp = iv_ca_cand_for_use (ivs, use);
4205 if (cp)
4207 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4208 iv_ca_set_no_cp (data, ivs, use);
4211 /* First try important candidates. Only if it fails, try the specific ones.
4212 Rationale -- in loops with many variables the best choice often is to use
4213 just one generic biv. If we added here many ivs specific to the uses,
4214 the optimization algorithm later would be likely to get stuck in a local
4215 minimum, thus causing us to create too many ivs. The approach from
4216 few ivs to more seems more likely to be successful -- starting from few
4217 ivs, replacing an expensive use by a specific iv should always be a
4218 win. */
4219 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4221 cand = iv_cand (data, i);
4223 if (iv_ca_cand_used_p (ivs, cand))
4224 continue;
4226 cp = get_use_iv_cost (data, use, cand);
4227 if (!cp)
4228 continue;
4230 iv_ca_set_cp (data, ivs, use, cp);
4231 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4232 iv_ca_set_no_cp (data, ivs, use);
4233 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4235 if (act_cost < best_cost)
4237 best_cost = act_cost;
4239 iv_ca_delta_free (&best_delta);
4240 best_delta = act_delta;
4242 else
4243 iv_ca_delta_free (&act_delta);
4246 if (best_cost == INFTY)
4248 for (i = 0; i < use->n_map_members; i++)
4250 cp = use->cost_map + i;
4251 cand = cp->cand;
4252 if (!cand)
4253 continue;
4255 /* Already tried this. */
4256 if (cand->important)
4257 continue;
4259 if (iv_ca_cand_used_p (ivs, cand))
4260 continue;
4262 act_delta = NULL;
4263 iv_ca_set_cp (data, ivs, use, cp);
4264 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4265 iv_ca_set_no_cp (data, ivs, use);
4266 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4267 cp, act_delta);
4269 if (act_cost < best_cost)
4271 best_cost = act_cost;
4273 if (best_delta)
4274 iv_ca_delta_free (&best_delta);
4275 best_delta = act_delta;
4277 else
4278 iv_ca_delta_free (&act_delta);
4282 iv_ca_delta_commit (data, ivs, best_delta, true);
4283 iv_ca_delta_free (&best_delta);
4285 return (best_cost != INFTY);
4288 /* Finds an initial assignment of candidates to uses. */
4290 static struct iv_ca *
4291 get_initial_solution (struct ivopts_data *data)
4293 struct iv_ca *ivs = iv_ca_new (data);
4294 unsigned i;
4296 for (i = 0; i < n_iv_uses (data); i++)
4297 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4299 iv_ca_free (&ivs);
4300 return NULL;
4303 return ivs;
4306 /* Tries to improve set of induction variables IVS. */
4308 static bool
4309 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4311 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4312 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4313 struct iv_cand *cand;
4315 /* Try extending the set of induction variables by one. */
4316 for (i = 0; i < n_iv_cands (data); i++)
4318 cand = iv_cand (data, i);
4320 if (iv_ca_cand_used_p (ivs, cand))
4321 continue;
4323 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4324 if (!act_delta)
4325 continue;
4327 /* If we successfully added the candidate and the set is small enough,
4328 try optimizing it by removing other candidates. */
4329 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4331 iv_ca_delta_commit (data, ivs, act_delta, true);
4332 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4333 iv_ca_delta_commit (data, ivs, act_delta, false);
4334 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4337 if (acost < best_cost)
4339 best_cost = acost;
4340 iv_ca_delta_free (&best_delta);
4341 best_delta = act_delta;
4343 else
4344 iv_ca_delta_free (&act_delta);
4347 if (!best_delta)
4349 /* Try removing the candidates from the set instead. */
4350 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4352 /* Nothing more we can do. */
4353 if (!best_delta)
4354 return false;
4357 iv_ca_delta_commit (data, ivs, best_delta, true);
4358 gcc_assert (best_cost == iv_ca_cost (ivs));
4359 iv_ca_delta_free (&best_delta);
4360 return true;
4363 /* Attempts to find the optimal set of induction variables. We do simple
4364 greedy heuristic -- we try to replace at most one candidate in the selected
4365 solution and remove the unused ivs while this improves the cost. */
4367 static struct iv_ca *
4368 find_optimal_iv_set (struct ivopts_data *data)
4370 unsigned i;
4371 struct iv_ca *set;
4372 struct iv_use *use;
4374 /* Get the initial solution. */
4375 set = get_initial_solution (data);
4376 if (!set)
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4379 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4380 return NULL;
4383 if (dump_file && (dump_flags & TDF_DETAILS))
4385 fprintf (dump_file, "Initial set of candidates:\n");
4386 iv_ca_dump (data, dump_file, set);
4389 while (try_improve_iv_set (data, set))
4391 if (dump_file && (dump_flags & TDF_DETAILS))
4393 fprintf (dump_file, "Improved to:\n");
4394 iv_ca_dump (data, dump_file, set);
4398 if (dump_file && (dump_flags & TDF_DETAILS))
4399 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4401 for (i = 0; i < n_iv_uses (data); i++)
4403 use = iv_use (data, i);
4404 use->selected = iv_ca_cand_for_use (set, use)->cand;
4407 return set;
4410 /* Creates a new induction variable corresponding to CAND. */
4412 static void
4413 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4415 block_stmt_iterator incr_pos;
4416 tree base;
4417 bool after = false;
4419 if (!cand->iv)
4420 return;
4422 switch (cand->pos)
4424 case IP_NORMAL:
4425 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4426 break;
4428 case IP_END:
4429 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4430 after = true;
4431 break;
4433 case IP_ORIGINAL:
4434 /* Mark that the iv is preserved. */
4435 name_info (data, cand->var_before)->preserve_biv = true;
4436 name_info (data, cand->var_after)->preserve_biv = true;
4438 /* Rewrite the increment so that it uses var_before directly. */
4439 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4441 return;
4444 gimple_add_tmp_var (cand->var_before);
4445 add_referenced_tmp_var (cand->var_before);
4447 base = unshare_expr (cand->iv->base);
4449 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4450 &incr_pos, after, &cand->var_before, &cand->var_after);
4453 /* Creates new induction variables described in SET. */
4455 static void
4456 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4458 unsigned i;
4459 struct iv_cand *cand;
4460 bitmap_iterator bi;
4462 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4464 cand = iv_cand (data, i);
4465 create_new_iv (data, cand);
4469 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4470 is true, remove also the ssa name defined by the statement. */
4472 static void
4473 remove_statement (tree stmt, bool including_defined_name)
4475 if (TREE_CODE (stmt) == PHI_NODE)
4477 if (!including_defined_name)
4479 /* Prevent the ssa name defined by the statement from being removed. */
4480 SET_PHI_RESULT (stmt, NULL);
4482 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4484 else
4486 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4488 bsi_remove (&bsi);
4492 /* Rewrites USE (definition of iv used in a nonlinear expression)
4493 using candidate CAND. */
4495 static void
4496 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4497 struct iv_use *use, struct iv_cand *cand)
4499 tree comp = unshare_expr (get_computation (data->current_loop,
4500 use, cand));
4501 tree op, stmts, tgt, ass;
4502 block_stmt_iterator bsi, pbsi;
4504 switch (TREE_CODE (use->stmt))
4506 case PHI_NODE:
4507 tgt = PHI_RESULT (use->stmt);
4509 /* If we should keep the biv, do not replace it. */
4510 if (name_info (data, tgt)->preserve_biv)
4511 return;
4513 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4514 while (!bsi_end_p (pbsi)
4515 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4517 bsi = pbsi;
4518 bsi_next (&pbsi);
4520 break;
4522 case MODIFY_EXPR:
4523 tgt = TREE_OPERAND (use->stmt, 0);
4524 bsi = bsi_for_stmt (use->stmt);
4525 break;
4527 default:
4528 gcc_unreachable ();
4531 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4533 if (TREE_CODE (use->stmt) == PHI_NODE)
4535 if (stmts)
4536 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4537 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4538 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4539 remove_statement (use->stmt, false);
4540 SSA_NAME_DEF_STMT (tgt) = ass;
4542 else
4544 if (stmts)
4545 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4546 TREE_OPERAND (use->stmt, 1) = op;
4550 /* Replaces ssa name in index IDX by its basic variable. Callback for
4551 for_each_index. */
4553 static bool
4554 idx_remove_ssa_names (tree base, tree *idx,
4555 void *data ATTRIBUTE_UNUSED)
4557 tree *op;
4559 if (TREE_CODE (*idx) == SSA_NAME)
4560 *idx = SSA_NAME_VAR (*idx);
4562 if (TREE_CODE (base) == ARRAY_REF)
4564 op = &TREE_OPERAND (base, 2);
4565 if (*op
4566 && TREE_CODE (*op) == SSA_NAME)
4567 *op = SSA_NAME_VAR (*op);
4568 op = &TREE_OPERAND (base, 3);
4569 if (*op
4570 && TREE_CODE (*op) == SSA_NAME)
4571 *op = SSA_NAME_VAR (*op);
4574 return true;
4577 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4579 static tree
4580 unshare_and_remove_ssa_names (tree ref)
4582 ref = unshare_expr (ref);
4583 for_each_index (&ref, idx_remove_ssa_names, NULL);
4585 return ref;
4588 /* Rewrites base of memory access OP with expression WITH in statement
4589 pointed to by BSI. */
4591 static void
4592 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4594 tree bvar, var, new_var, new_name, copy, name;
4595 tree orig;
4597 var = bvar = get_base_address (*op);
4599 if (!var || TREE_CODE (with) != SSA_NAME)
4600 goto do_rewrite;
4602 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4603 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4604 if (TREE_CODE (var) == INDIRECT_REF)
4605 var = TREE_OPERAND (var, 0);
4606 if (TREE_CODE (var) == SSA_NAME)
4608 name = var;
4609 var = SSA_NAME_VAR (var);
4611 else if (DECL_P (var))
4612 name = NULL_TREE;
4613 else
4614 goto do_rewrite;
4616 if (var_ann (var)->type_mem_tag)
4617 var = var_ann (var)->type_mem_tag;
4619 /* We need to add a memory tag for the variable. But we do not want
4620 to add it to the temporary used for the computations, since this leads
4621 to problems in redundancy elimination when there are common parts
4622 in two computations referring to the different arrays. So we copy
4623 the variable to a new temporary. */
4624 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4625 if (name)
4626 new_name = duplicate_ssa_name (name, copy);
4627 else
4629 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4630 add_referenced_tmp_var (new_var);
4631 var_ann (new_var)->type_mem_tag = var;
4632 new_name = make_ssa_name (new_var, copy);
4634 TREE_OPERAND (copy, 0) = new_name;
4635 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4636 with = new_name;
4638 do_rewrite:
4640 orig = NULL_TREE;
4641 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4642 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4644 if (TREE_CODE (*op) == INDIRECT_REF)
4645 orig = REF_ORIGINAL (*op);
4646 if (!orig)
4647 orig = unshare_and_remove_ssa_names (*op);
4649 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4651 /* Record the original reference, for purposes of alias analysis. */
4652 REF_ORIGINAL (*op) = orig;
4655 /* Rewrites USE (address that is an iv) using candidate CAND. */
4657 static void
4658 rewrite_use_address (struct ivopts_data *data,
4659 struct iv_use *use, struct iv_cand *cand)
4661 tree comp = unshare_expr (get_computation (data->current_loop,
4662 use, cand));
4663 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4664 tree stmts;
4665 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4667 if (stmts)
4668 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4670 rewrite_address_base (&bsi, use->op_p, op);
4673 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4674 candidate CAND. */
4676 static void
4677 rewrite_use_compare (struct ivopts_data *data,
4678 struct iv_use *use, struct iv_cand *cand)
4680 tree comp;
4681 tree *op_p, cond, op, stmts, bound;
4682 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4683 enum tree_code compare;
4685 if (may_eliminate_iv (data->current_loop,
4686 use, cand, &compare, &bound))
4688 op = force_gimple_operand (unshare_expr (bound), &stmts,
4689 true, NULL_TREE);
4691 if (stmts)
4692 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4694 *use->op_p = build2 (compare, boolean_type_node,
4695 var_at_stmt (data->current_loop,
4696 cand, use->stmt), op);
4697 modify_stmt (use->stmt);
4698 return;
4701 /* The induction variable elimination failed; just express the original
4702 giv. */
4703 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4705 cond = *use->op_p;
4706 op_p = &TREE_OPERAND (cond, 0);
4707 if (TREE_CODE (*op_p) != SSA_NAME
4708 || zero_p (get_iv (data, *op_p)->step))
4709 op_p = &TREE_OPERAND (cond, 1);
4711 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4712 if (stmts)
4713 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4715 *op_p = op;
4718 /* Ensure that operand *OP_P may be used at the end of EXIT without
4719 violating loop closed ssa form. */
4721 static void
4722 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4724 basic_block def_bb;
4725 struct loop *def_loop;
4726 tree phi, use;
4728 use = USE_FROM_PTR (op_p);
4729 if (TREE_CODE (use) != SSA_NAME)
4730 return;
4732 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4733 if (!def_bb)
4734 return;
4736 def_loop = def_bb->loop_father;
4737 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4738 return;
4740 /* Try finding a phi node that copies the value out of the loop. */
4741 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4742 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4743 break;
4745 if (!phi)
4747 /* Create such a phi node. */
4748 tree new_name = duplicate_ssa_name (use, NULL);
4750 phi = create_phi_node (new_name, exit->dest);
4751 SSA_NAME_DEF_STMT (new_name) = phi;
4752 add_phi_arg (phi, use, exit);
4755 SET_USE (op_p, PHI_RESULT (phi));
4758 /* Ensure that operands of STMT may be used at the end of EXIT without
4759 violating loop closed ssa form. */
4761 static void
4762 protect_loop_closed_ssa_form (edge exit, tree stmt)
4764 use_optype uses;
4765 vuse_optype vuses;
4766 v_may_def_optype v_may_defs;
4767 unsigned i;
4769 get_stmt_operands (stmt);
4771 uses = STMT_USE_OPS (stmt);
4772 for (i = 0; i < NUM_USES (uses); i++)
4773 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4775 vuses = STMT_VUSE_OPS (stmt);
4776 for (i = 0; i < NUM_VUSES (vuses); i++)
4777 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4779 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4780 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4781 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4784 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4785 so that they are emitted on the correct place, and so that the loop closed
4786 ssa form is preserved. */
4788 static void
4789 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4791 tree_stmt_iterator tsi;
4792 block_stmt_iterator bsi;
4793 tree phi, stmt, def, next;
4795 if (EDGE_COUNT (exit->dest->preds) > 1)
4796 split_loop_exit_edge (exit);
4798 if (TREE_CODE (stmts) == STATEMENT_LIST)
4800 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4801 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4803 else
4804 protect_loop_closed_ssa_form (exit, stmts);
4806 /* Ensure there is label in exit->dest, so that we can
4807 insert after it. */
4808 tree_block_label (exit->dest);
4809 bsi = bsi_after_labels (exit->dest);
4810 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4812 if (!op)
4813 return;
4815 for (phi = phi_nodes (exit->dest); phi; phi = next)
4817 next = PHI_CHAIN (phi);
4819 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4821 def = PHI_RESULT (phi);
4822 remove_statement (phi, false);
4823 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4824 def, op);
4825 SSA_NAME_DEF_STMT (def) = stmt;
4826 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4831 /* Rewrites the final value of USE (that is only needed outside of the loop)
4832 using candidate CAND. */
4834 static void
4835 rewrite_use_outer (struct ivopts_data *data,
4836 struct iv_use *use, struct iv_cand *cand)
4838 edge exit;
4839 tree value, op, stmts, tgt;
4840 tree phi;
4842 switch (TREE_CODE (use->stmt))
4844 case PHI_NODE:
4845 tgt = PHI_RESULT (use->stmt);
4846 break;
4847 case MODIFY_EXPR:
4848 tgt = TREE_OPERAND (use->stmt, 0);
4849 break;
4850 default:
4851 gcc_unreachable ();
4854 exit = single_dom_exit (data->current_loop);
4856 if (exit)
4858 if (!cand->iv)
4860 bool ok = may_replace_final_value (data->current_loop, use, &value);
4861 gcc_assert (ok);
4863 else
4864 value = get_computation_at (data->current_loop,
4865 use, cand, last_stmt (exit->src));
4867 value = unshare_expr (value);
4868 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4870 /* If we will preserve the iv anyway and we would need to perform
4871 some computation to replace the final value, do nothing. */
4872 if (stmts && name_info (data, tgt)->preserve_biv)
4873 return;
4875 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4877 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4879 if (USE_FROM_PTR (use_p) == tgt)
4880 SET_USE (use_p, op);
4883 if (stmts)
4884 compute_phi_arg_on_exit (exit, stmts, op);
4886 /* Enable removal of the statement. We cannot remove it directly,
4887 since we may still need the aliasing information attached to the
4888 ssa name defined by it. */
4889 name_info (data, tgt)->iv->have_use_for = false;
4890 return;
4893 /* If the variable is going to be preserved anyway, there is nothing to
4894 do. */
4895 if (name_info (data, tgt)->preserve_biv)
4896 return;
4898 /* Otherwise we just need to compute the iv. */
4899 rewrite_use_nonlinear_expr (data, use, cand);
4902 /* Rewrites USE using candidate CAND. */
4904 static void
4905 rewrite_use (struct ivopts_data *data,
4906 struct iv_use *use, struct iv_cand *cand)
4908 switch (use->type)
4910 case USE_NONLINEAR_EXPR:
4911 rewrite_use_nonlinear_expr (data, use, cand);
4912 break;
4914 case USE_OUTER:
4915 rewrite_use_outer (data, use, cand);
4916 break;
4918 case USE_ADDRESS:
4919 rewrite_use_address (data, use, cand);
4920 break;
4922 case USE_COMPARE:
4923 rewrite_use_compare (data, use, cand);
4924 break;
4926 default:
4927 gcc_unreachable ();
4929 modify_stmt (use->stmt);
4932 /* Rewrite the uses using the selected induction variables. */
4934 static void
4935 rewrite_uses (struct ivopts_data *data)
4937 unsigned i;
4938 struct iv_cand *cand;
4939 struct iv_use *use;
4941 for (i = 0; i < n_iv_uses (data); i++)
4943 use = iv_use (data, i);
4944 cand = use->selected;
4945 gcc_assert (cand);
4947 rewrite_use (data, use, cand);
4951 /* Removes the ivs that are not used after rewriting. */
4953 static void
4954 remove_unused_ivs (struct ivopts_data *data)
4956 unsigned j;
4957 bitmap_iterator bi;
4959 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4961 struct version_info *info;
4963 info = ver_info (data, j);
4964 if (info->iv
4965 && !zero_p (info->iv->step)
4966 && !info->inv_id
4967 && !info->iv->have_use_for
4968 && !info->preserve_biv)
4969 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4973 /* Frees data allocated by the optimization of a single loop. */
4975 static void
4976 free_loop_data (struct ivopts_data *data)
4978 unsigned i, j;
4979 bitmap_iterator bi;
4981 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4983 struct version_info *info;
4985 info = ver_info (data, i);
4986 if (info->iv)
4987 free (info->iv);
4988 info->iv = NULL;
4989 info->has_nonlin_use = false;
4990 info->preserve_biv = false;
4991 info->inv_id = 0;
4993 bitmap_clear (data->relevant);
4994 bitmap_clear (data->important_candidates);
4996 for (i = 0; i < n_iv_uses (data); i++)
4998 struct iv_use *use = iv_use (data, i);
5000 free (use->iv);
5001 BITMAP_XFREE (use->related_cands);
5002 for (j = 0; j < use->n_map_members; j++)
5003 if (use->cost_map[j].depends_on)
5004 BITMAP_XFREE (use->cost_map[j].depends_on);
5005 free (use->cost_map);
5006 free (use);
5008 VARRAY_POP_ALL (data->iv_uses);
5010 for (i = 0; i < n_iv_cands (data); i++)
5012 struct iv_cand *cand = iv_cand (data, i);
5014 if (cand->iv)
5015 free (cand->iv);
5016 free (cand);
5018 VARRAY_POP_ALL (data->iv_candidates);
5020 if (data->version_info_size < num_ssa_names)
5022 data->version_info_size = 2 * num_ssa_names;
5023 free (data->version_info);
5024 data->version_info = xcalloc (data->version_info_size,
5025 sizeof (struct version_info));
5028 data->max_inv_id = 0;
5030 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5032 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5034 SET_DECL_RTL (obj, NULL_RTX);
5036 VARRAY_POP_ALL (decl_rtl_to_reset);
5039 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5040 loop tree. */
5042 static void
5043 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5045 unsigned i;
5047 for (i = 1; i < loops->num; i++)
5048 if (loops->parray[i])
5050 free (loops->parray[i]->aux);
5051 loops->parray[i]->aux = NULL;
5054 free_loop_data (data);
5055 free (data->version_info);
5056 BITMAP_XFREE (data->relevant);
5057 BITMAP_XFREE (data->important_candidates);
5059 VARRAY_FREE (decl_rtl_to_reset);
5060 VARRAY_FREE (data->iv_uses);
5061 VARRAY_FREE (data->iv_candidates);
5064 /* Optimizes the LOOP. Returns true if anything changed. */
5066 static bool
5067 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5069 bool changed = false;
5070 struct iv_ca *iv_ca;
5071 edge exit;
5073 data->current_loop = loop;
5075 if (dump_file && (dump_flags & TDF_DETAILS))
5077 fprintf (dump_file, "Processing loop %d\n", loop->num);
5079 exit = single_dom_exit (loop);
5080 if (exit)
5082 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5083 exit->src->index, exit->dest->index);
5084 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5085 fprintf (dump_file, "\n");
5088 fprintf (dump_file, "\n");
5091 /* For each ssa name determines whether it behaves as an induction variable
5092 in some loop. */
5093 if (!find_induction_variables (data))
5094 goto finish;
5096 /* Finds interesting uses (item 1). */
5097 find_interesting_uses (data);
5098 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5099 goto finish;
5101 /* Finds candidates for the induction variables (item 2). */
5102 find_iv_candidates (data);
5104 /* Calculates the costs (item 3, part 1). */
5105 determine_use_iv_costs (data);
5106 determine_iv_costs (data);
5107 determine_set_costs (data);
5109 /* Find the optimal set of induction variables (item 3, part 2). */
5110 iv_ca = find_optimal_iv_set (data);
5111 if (!iv_ca)
5112 goto finish;
5113 changed = true;
5115 /* Create the new induction variables (item 4, part 1). */
5116 create_new_ivs (data, iv_ca);
5117 iv_ca_free (&iv_ca);
5119 /* Rewrite the uses (item 4, part 2). */
5120 rewrite_uses (data);
5122 /* Remove the ivs that are unused after rewriting. */
5123 remove_unused_ivs (data);
5125 loop_commit_inserts ();
5127 /* We have changed the structure of induction variables; it might happen
5128 that definitions in the scev database refer to some of them that were
5129 eliminated. */
5130 scev_reset ();
5132 finish:
5133 free_loop_data (data);
5135 return changed;
5138 /* Main entry point. Optimizes induction variables in LOOPS. */
5140 void
5141 tree_ssa_iv_optimize (struct loops *loops)
5143 struct loop *loop;
5144 struct ivopts_data data;
5146 tree_ssa_iv_optimize_init (loops, &data);
5148 /* Optimize the loops starting with the innermost ones. */
5149 loop = loops->tree_root;
5150 while (loop->inner)
5151 loop = loop->inner;
5153 #ifdef ENABLE_CHECKING
5154 verify_loop_closed_ssa ();
5155 verify_stmts ();
5156 #endif
5158 /* Scan the loops, inner ones first. */
5159 while (loop != loops->tree_root)
5161 if (dump_file && (dump_flags & TDF_DETAILS))
5162 flow_loop_dump (loop, dump_file, NULL, 1);
5164 tree_ssa_iv_optimize_loop (&data, loop);
5166 if (loop->next)
5168 loop = loop->next;
5169 while (loop->inner)
5170 loop = loop->inner;
5172 else
5173 loop = loop->outer;
5176 #ifdef ENABLE_CHECKING
5177 verify_loop_closed_ssa ();
5178 verify_stmts ();
5179 #endif
5181 tree_ssa_iv_optimize_finalize (loops, &data);