* gimplify.c (gimplify_modify_expr_rhs): Use types_compatible_p.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobd1a1bdd910aae164d663de614dcf3f184af462c0
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 struct tree_niter_desc niter;
127 /* Number of iterations. */
129 unsigned regs_used; /* Number of registers used. */
132 /* Types of uses. */
133 enum use_type
135 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
136 USE_OUTER, /* The induction variable is used outside the loop. */
137 USE_ADDRESS, /* Use in an address. */
138 USE_COMPARE /* Use is a compare. */
141 /* The candidate - cost pair. */
142 struct cost_pair
144 struct iv_cand *cand; /* The candidate. */
145 unsigned cost; /* The cost. */
146 bitmap depends_on; /* The list of invariants that have to be
147 preserved. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
195 /* The data used by the induction variable optimizations. */
197 struct ivopts_data
199 /* The currently optimized loop. */
200 struct loop *current_loop;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* The number of candidates in the set. */
248 unsigned n_cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
303 static varray_type decl_rtl_to_reset;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
337 /* The data for LOOP. */
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
342 return loop->aux;
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
347 static edge
348 single_dom_exit (struct loop *loop)
350 edge exit = loop->single_exit;
352 if (!exit)
353 return NULL;
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
358 return exit;
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
367 if (iv->ssa_name)
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 if (iv->base_object)
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
412 fprintf (file, "use %d\n", use->id);
414 switch (use->type)
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
451 /* Dumps information about the uses to FILE. */
453 extern void dump_uses (FILE *, struct ivopts_data *);
454 void
455 dump_uses (FILE *file, struct ivopts_data *data)
457 unsigned i;
458 struct iv_use *use;
460 for (i = 0; i < n_iv_uses (data); i++)
462 use = iv_use (data, i);
464 dump_use (file, use);
465 fprintf (file, "\n");
469 /* Dumps information about induction variable candidate CAND to FILE. */
471 extern void dump_cand (FILE *, struct iv_cand *);
472 void
473 dump_cand (FILE *file, struct iv_cand *cand)
475 struct iv *iv = cand->iv;
477 fprintf (file, "candidate %d%s\n",
478 cand->id, cand->important ? " (important)" : "");
480 if (!iv)
482 fprintf (file, " final value replacement\n");
483 return;
486 switch (cand->pos)
488 case IP_NORMAL:
489 fprintf (file, " incremented before exit test\n");
490 break;
492 case IP_END:
493 fprintf (file, " incremented at end\n");
494 break;
496 case IP_ORIGINAL:
497 fprintf (file, " original biv\n");
498 break;
501 dump_iv (file, iv);
504 /* Returns the info for ssa version VER. */
506 static inline struct version_info *
507 ver_info (struct ivopts_data *data, unsigned ver)
509 return data->version_info + ver;
512 /* Returns the info for ssa name NAME. */
514 static inline struct version_info *
515 name_info (struct ivopts_data *data, tree name)
517 return ver_info (data, SSA_NAME_VERSION (name));
520 /* Checks whether there exists number X such that X * B = A, counting modulo
521 2^BITS. */
523 static bool
524 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
525 HOST_WIDE_INT *x)
527 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
528 unsigned HOST_WIDE_INT inv, ex, val;
529 unsigned i;
531 a &= mask;
532 b &= mask;
534 /* First divide the whole equation by 2 as long as possible. */
535 while (!(a & 1) && !(b & 1))
537 a >>= 1;
538 b >>= 1;
539 bits--;
540 mask >>= 1;
543 if (!(b & 1))
545 /* If b is still even, a is odd and there is no such x. */
546 return false;
549 /* Find the inverse of b. We compute it as
550 b^(2^(bits - 1) - 1) (mod 2^bits). */
551 inv = 1;
552 ex = b;
553 for (i = 0; i < bits - 1; i++)
555 inv = (inv * ex) & mask;
556 ex = (ex * ex) & mask;
559 val = (a * inv) & mask;
561 gcc_assert (((val * b) & mask) == a);
563 if ((val >> (bits - 1)) & 1)
564 val |= ~mask;
566 *x = val;
568 return true;
571 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
572 emitted in LOOP. */
574 static bool
575 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
577 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
579 gcc_assert (bb);
581 if (sbb == loop->latch)
582 return true;
584 if (sbb != bb)
585 return false;
587 return stmt == last_stmt (bb);
590 /* Returns true if STMT if after the place where the original induction
591 variable CAND is incremented. */
593 static bool
594 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
596 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
597 basic_block stmt_bb = bb_for_stmt (stmt);
598 block_stmt_iterator bsi;
600 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
601 return false;
603 if (stmt_bb != cand_bb)
604 return true;
606 /* Scan the block from the end, since the original ivs are usually
607 incremented at the end of the loop body. */
608 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
610 if (bsi_stmt (bsi) == cand->incremented_at)
611 return false;
612 if (bsi_stmt (bsi) == stmt)
613 return true;
617 /* Returns true if STMT if after the place where the induction variable
618 CAND is incremented in LOOP. */
620 static bool
621 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
623 switch (cand->pos)
625 case IP_END:
626 return false;
628 case IP_NORMAL:
629 return stmt_after_ip_normal_pos (loop, stmt);
631 case IP_ORIGINAL:
632 return stmt_after_ip_original_pos (cand, stmt);
634 default:
635 gcc_unreachable ();
639 /* Initializes data structures used by the iv optimization pass, stored
640 in DATA. LOOPS is the loop tree. */
642 static void
643 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
645 unsigned i;
647 data->version_info_size = 2 * num_ssa_names;
648 data->version_info = xcalloc (data->version_info_size,
649 sizeof (struct version_info));
650 data->relevant = BITMAP_XMALLOC ();
651 data->important_candidates = BITMAP_XMALLOC ();
652 data->max_inv_id = 0;
654 for (i = 1; i < loops->num; i++)
655 if (loops->parray[i])
656 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
658 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
659 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
660 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
663 /* Returns a memory object to that EXPR points. In case we are able to
664 determine that it does not point to any such object, NULL is returned. */
666 static tree
667 determine_base_object (tree expr)
669 enum tree_code code = TREE_CODE (expr);
670 tree base, obj, op0, op1;
672 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
673 return NULL_TREE;
675 switch (code)
677 case INTEGER_CST:
678 return NULL_TREE;
680 case ADDR_EXPR:
681 obj = TREE_OPERAND (expr, 0);
682 base = get_base_address (obj);
684 if (!base)
685 return fold_convert (ptr_type_node, expr);
687 if (TREE_CODE (base) == INDIRECT_REF)
688 return fold_convert (ptr_type_node, TREE_OPERAND (base, 0));
690 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
692 case PLUS_EXPR:
693 case MINUS_EXPR:
694 op0 = determine_base_object (TREE_OPERAND (expr, 0));
695 op1 = determine_base_object (TREE_OPERAND (expr, 1));
697 if (!op1)
698 return op0;
700 if (!op0)
701 return (code == PLUS_EXPR
702 ? op1
703 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
705 return fold (build (code, ptr_type_node, op0, op1));
707 default:
708 return fold_convert (ptr_type_node, expr);
712 /* Allocates an induction variable with given initial value BASE and step STEP
713 for loop LOOP. */
715 static struct iv *
716 alloc_iv (tree base, tree step)
718 struct iv *iv = xcalloc (1, sizeof (struct iv));
720 if (step && integer_zerop (step))
721 step = NULL_TREE;
723 iv->base = base;
724 iv->base_object = determine_base_object (base);
725 iv->step = step;
726 iv->biv_p = false;
727 iv->have_use_for = false;
728 iv->use_id = 0;
729 iv->ssa_name = NULL_TREE;
731 return iv;
734 /* Sets STEP and BASE for induction variable IV. */
736 static void
737 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
739 struct version_info *info = name_info (data, iv);
741 gcc_assert (!info->iv);
743 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
744 info->iv = alloc_iv (base, step);
745 info->iv->ssa_name = iv;
748 /* Finds induction variable declaration for VAR. */
750 static struct iv *
751 get_iv (struct ivopts_data *data, tree var)
753 basic_block bb;
755 if (!name_info (data, var)->iv)
757 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
759 if (!bb
760 || !flow_bb_inside_loop_p (data->current_loop, bb))
761 set_iv (data, var, var, NULL_TREE);
764 return name_info (data, var)->iv;
767 /* Determines the step of a biv defined in PHI. */
769 static tree
770 determine_biv_step (tree phi)
772 struct loop *loop = bb_for_stmt (phi)->loop_father;
773 tree name = PHI_RESULT (phi), base, step;
774 tree type = TREE_TYPE (name);
776 if (!is_gimple_reg (name))
777 return NULL_TREE;
779 if (!simple_iv (loop, phi, name, &base, &step))
780 return NULL_TREE;
782 if (!step)
783 return build_int_cst (type, 0);
785 return step;
788 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
790 static bool
791 abnormal_ssa_name_p (tree exp)
793 if (!exp)
794 return false;
796 if (TREE_CODE (exp) != SSA_NAME)
797 return false;
799 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
802 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
803 abnormal phi node. Callback for for_each_index. */
805 static bool
806 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
807 void *data ATTRIBUTE_UNUSED)
809 if (TREE_CODE (base) == ARRAY_REF)
811 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
812 return false;
813 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
814 return false;
817 return !abnormal_ssa_name_p (*index);
820 /* Returns true if EXPR contains a ssa name that occurs in an
821 abnormal phi node. */
823 static bool
824 contains_abnormal_ssa_name_p (tree expr)
826 enum tree_code code = TREE_CODE (expr);
827 enum tree_code_class class = TREE_CODE_CLASS (code);
829 if (code == SSA_NAME)
830 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
832 if (code == INTEGER_CST
833 || is_gimple_min_invariant (expr))
834 return false;
836 if (code == ADDR_EXPR)
837 return !for_each_index (&TREE_OPERAND (expr, 0),
838 idx_contains_abnormal_ssa_name_p,
839 NULL);
841 switch (class)
843 case tcc_binary:
844 case tcc_comparison:
845 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
846 return true;
848 /* Fallthru. */
849 case tcc_unary:
850 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
851 return true;
853 break;
855 default:
856 gcc_unreachable ();
859 return false;
862 /* Finds basic ivs. */
864 static bool
865 find_bivs (struct ivopts_data *data)
867 tree phi, step, type, base;
868 bool found = false;
869 struct loop *loop = data->current_loop;
871 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
873 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
874 continue;
876 step = determine_biv_step (phi);
878 if (!step)
879 continue;
880 if (cst_and_fits_in_hwi (step)
881 && int_cst_value (step) == 0)
882 continue;
884 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
885 if (contains_abnormal_ssa_name_p (base))
886 continue;
888 type = TREE_TYPE (PHI_RESULT (phi));
889 base = fold_convert (type, base);
890 step = fold_convert (type, step);
892 /* FIXME: We do not handle induction variables whose step does
893 not satisfy cst_and_fits_in_hwi. */
894 if (!cst_and_fits_in_hwi (step))
895 continue;
897 set_iv (data, PHI_RESULT (phi), base, step);
898 found = true;
901 return found;
904 /* Marks basic ivs. */
906 static void
907 mark_bivs (struct ivopts_data *data)
909 tree phi, var;
910 struct iv *iv, *incr_iv;
911 struct loop *loop = data->current_loop;
912 basic_block incr_bb;
914 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
916 iv = get_iv (data, PHI_RESULT (phi));
917 if (!iv)
918 continue;
920 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
921 incr_iv = get_iv (data, var);
922 if (!incr_iv)
923 continue;
925 /* If the increment is in the subloop, ignore it. */
926 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
927 if (incr_bb->loop_father != data->current_loop
928 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
929 continue;
931 iv->biv_p = true;
932 incr_iv->biv_p = true;
936 /* Checks whether STMT defines a linear induction variable and stores its
937 parameters to BASE and STEP. */
939 static bool
940 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
941 tree *base, tree *step)
943 tree lhs;
944 struct loop *loop = data->current_loop;
946 *base = NULL_TREE;
947 *step = NULL_TREE;
949 if (TREE_CODE (stmt) != MODIFY_EXPR)
950 return false;
952 lhs = TREE_OPERAND (stmt, 0);
953 if (TREE_CODE (lhs) != SSA_NAME)
954 return false;
956 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
957 return false;
959 /* FIXME: We do not handle induction variables whose step does
960 not satisfy cst_and_fits_in_hwi. */
961 if (!zero_p (*step)
962 && !cst_and_fits_in_hwi (*step))
963 return false;
965 if (contains_abnormal_ssa_name_p (*base))
966 return false;
968 return true;
971 /* Finds general ivs in statement STMT. */
973 static void
974 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
976 tree base, step;
978 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
979 return;
981 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
984 /* Finds general ivs in basic block BB. */
986 static void
987 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
989 block_stmt_iterator bsi;
991 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
992 find_givs_in_stmt (data, bsi_stmt (bsi));
995 /* Finds general ivs. */
997 static void
998 find_givs (struct ivopts_data *data)
1000 struct loop *loop = data->current_loop;
1001 basic_block *body = get_loop_body_in_dom_order (loop);
1002 unsigned i;
1004 for (i = 0; i < loop->num_nodes; i++)
1005 find_givs_in_bb (data, body[i]);
1006 free (body);
1009 /* Determine the number of iterations of the current loop. */
1011 static void
1012 determine_number_of_iterations (struct ivopts_data *data)
1014 struct loop *loop = data->current_loop;
1015 edge exit = single_dom_exit (loop);
1017 if (!exit)
1018 return;
1020 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
1023 /* For each ssa name defined in LOOP determines whether it is an induction
1024 variable and if so, its initial value and step. */
1026 static bool
1027 find_induction_variables (struct ivopts_data *data)
1029 unsigned i;
1030 struct loop *loop = data->current_loop;
1031 bitmap_iterator bi;
1033 if (!find_bivs (data))
1034 return false;
1036 find_givs (data);
1037 mark_bivs (data);
1038 determine_number_of_iterations (data);
1040 if (dump_file && (dump_flags & TDF_DETAILS))
1042 if (loop_data (loop)->niter.niter)
1044 fprintf (dump_file, " number of iterations ");
1045 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
1046 TDF_SLIM);
1047 fprintf (dump_file, "\n");
1049 fprintf (dump_file, " may be zero if ");
1050 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
1051 TDF_SLIM);
1052 fprintf (dump_file, "\n");
1054 fprintf (dump_file, " bogus unless ");
1055 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
1056 TDF_SLIM);
1057 fprintf (dump_file, "\n");
1058 fprintf (dump_file, "\n");
1061 fprintf (dump_file, "Induction variables:\n\n");
1063 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1065 if (ver_info (data, i)->iv)
1066 dump_iv (dump_file, ver_info (data, i)->iv);
1070 return true;
1073 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1075 static struct iv_use *
1076 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1077 tree stmt, enum use_type use_type)
1079 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1081 use->id = n_iv_uses (data);
1082 use->type = use_type;
1083 use->iv = iv;
1084 use->stmt = stmt;
1085 use->op_p = use_p;
1086 use->related_cands = BITMAP_XMALLOC ();
1088 /* To avoid showing ssa name in the dumps, if it was not reset by the
1089 caller. */
1090 iv->ssa_name = NULL_TREE;
1092 if (dump_file && (dump_flags & TDF_DETAILS))
1093 dump_use (dump_file, use);
1095 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1097 return use;
1100 /* Checks whether OP is a loop-level invariant and if so, records it.
1101 NONLINEAR_USE is true if the invariant is used in a way we do not
1102 handle specially. */
1104 static void
1105 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1107 basic_block bb;
1108 struct version_info *info;
1110 if (TREE_CODE (op) != SSA_NAME
1111 || !is_gimple_reg (op))
1112 return;
1114 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1115 if (bb
1116 && flow_bb_inside_loop_p (data->current_loop, bb))
1117 return;
1119 info = name_info (data, op);
1120 info->name = op;
1121 info->has_nonlin_use |= nonlinear_use;
1122 if (!info->inv_id)
1123 info->inv_id = ++data->max_inv_id;
1124 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1127 /* Checks whether the use OP is interesting and if so, records it
1128 as TYPE. */
1130 static struct iv_use *
1131 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1132 enum use_type type)
1134 struct iv *iv;
1135 struct iv *civ;
1136 tree stmt;
1137 struct iv_use *use;
1139 if (TREE_CODE (op) != SSA_NAME)
1140 return NULL;
1142 iv = get_iv (data, op);
1143 if (!iv)
1144 return NULL;
1146 if (iv->have_use_for)
1148 use = iv_use (data, iv->use_id);
1150 gcc_assert (use->type == USE_NONLINEAR_EXPR
1151 || use->type == USE_OUTER);
1153 if (type == USE_NONLINEAR_EXPR)
1154 use->type = USE_NONLINEAR_EXPR;
1155 return use;
1158 if (zero_p (iv->step))
1160 record_invariant (data, op, true);
1161 return NULL;
1163 iv->have_use_for = true;
1165 civ = xmalloc (sizeof (struct iv));
1166 *civ = *iv;
1168 stmt = SSA_NAME_DEF_STMT (op);
1169 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1170 || TREE_CODE (stmt) == MODIFY_EXPR);
1172 use = record_use (data, NULL, civ, stmt, type);
1173 iv->use_id = use->id;
1175 return use;
1178 /* Checks whether the use OP is interesting and if so, records it. */
1180 static struct iv_use *
1181 find_interesting_uses_op (struct ivopts_data *data, tree op)
1183 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1186 /* Records a definition of induction variable OP that is used outside of the
1187 loop. */
1189 static struct iv_use *
1190 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1192 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1195 /* Checks whether the condition *COND_P in STMT is interesting
1196 and if so, records it. */
1198 static void
1199 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1201 tree *op0_p;
1202 tree *op1_p;
1203 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1204 struct iv const_iv;
1205 tree zero = integer_zero_node;
1207 const_iv.step = NULL_TREE;
1209 if (integer_zerop (*cond_p)
1210 || integer_nonzerop (*cond_p))
1211 return;
1213 if (TREE_CODE (*cond_p) == SSA_NAME)
1215 op0_p = cond_p;
1216 op1_p = &zero;
1218 else
1220 op0_p = &TREE_OPERAND (*cond_p, 0);
1221 op1_p = &TREE_OPERAND (*cond_p, 1);
1224 if (TREE_CODE (*op0_p) == SSA_NAME)
1225 iv0 = get_iv (data, *op0_p);
1226 else
1227 iv0 = &const_iv;
1229 if (TREE_CODE (*op1_p) == SSA_NAME)
1230 iv1 = get_iv (data, *op1_p);
1231 else
1232 iv1 = &const_iv;
1234 if (/* When comparing with non-invariant value, we may not do any senseful
1235 induction variable elimination. */
1236 (!iv0 || !iv1)
1237 /* Eliminating condition based on two ivs would be nontrivial.
1238 ??? TODO -- it is not really important to handle this case. */
1239 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1241 find_interesting_uses_op (data, *op0_p);
1242 find_interesting_uses_op (data, *op1_p);
1243 return;
1246 if (zero_p (iv0->step) && zero_p (iv1->step))
1248 /* If both are invariants, this is a work for unswitching. */
1249 return;
1252 civ = xmalloc (sizeof (struct iv));
1253 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1254 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1257 /* Returns true if expression EXPR is obviously invariant in LOOP,
1258 i.e. if all its operands are defined outside of the LOOP. */
1260 bool
1261 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1263 basic_block def_bb;
1264 unsigned i, len;
1266 if (is_gimple_min_invariant (expr))
1267 return true;
1269 if (TREE_CODE (expr) == SSA_NAME)
1271 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1272 if (def_bb
1273 && flow_bb_inside_loop_p (loop, def_bb))
1274 return false;
1276 return true;
1279 if (!EXPR_P (expr))
1280 return false;
1282 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1283 for (i = 0; i < len; i++)
1284 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1285 return false;
1287 return true;
1290 /* Cumulates the steps of indices into DATA and replaces their values with the
1291 initial ones. Returns false when the value of the index cannot be determined.
1292 Callback for for_each_index. */
1294 struct ifs_ivopts_data
1296 struct ivopts_data *ivopts_data;
1297 tree stmt;
1298 tree *step_p;
1301 static bool
1302 idx_find_step (tree base, tree *idx, void *data)
1304 struct ifs_ivopts_data *dta = data;
1305 struct iv *iv;
1306 tree step, type, iv_type, iv_step, lbound, off;
1307 struct loop *loop = dta->ivopts_data->current_loop;
1309 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1310 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1311 return false;
1313 /* If base is a component ref, require that the offset of the reference
1314 be invariant. */
1315 if (TREE_CODE (base) == COMPONENT_REF)
1317 off = component_ref_field_offset (base);
1318 return expr_invariant_in_loop_p (loop, off);
1321 /* If base is array, first check whether we will be able to move the
1322 reference out of the loop (in order to take its address in strength
1323 reduction). In order for this to work we need both lower bound
1324 and step to be loop invariants. */
1325 if (TREE_CODE (base) == ARRAY_REF)
1327 step = array_ref_element_size (base);
1328 lbound = array_ref_low_bound (base);
1330 if (!expr_invariant_in_loop_p (loop, step)
1331 || !expr_invariant_in_loop_p (loop, lbound))
1332 return false;
1335 if (TREE_CODE (*idx) != SSA_NAME)
1336 return true;
1338 iv = get_iv (dta->ivopts_data, *idx);
1339 if (!iv)
1340 return false;
1342 *idx = iv->base;
1344 if (!iv->step)
1345 return true;
1347 iv_type = TREE_TYPE (iv->base);
1348 type = build_pointer_type (TREE_TYPE (base));
1349 if (TREE_CODE (base) == ARRAY_REF)
1351 step = array_ref_element_size (base);
1353 /* We only handle addresses whose step is an integer constant. */
1354 if (TREE_CODE (step) != INTEGER_CST)
1355 return false;
1357 else
1358 /* The step for pointer arithmetics already is 1 byte. */
1359 step = build_int_cst (type, 1);
1361 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1362 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1363 type, iv->base, iv->step, dta->stmt);
1364 else
1365 iv_step = fold_convert (iv_type, iv->step);
1367 if (!iv_step)
1369 /* The index might wrap. */
1370 return false;
1373 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1375 if (!*dta->step_p)
1376 *dta->step_p = step;
1377 else
1378 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1379 *dta->step_p, step);
1381 return true;
1384 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1385 object is passed to it in DATA. */
1387 static bool
1388 idx_record_use (tree base, tree *idx,
1389 void *data)
1391 find_interesting_uses_op (data, *idx);
1392 if (TREE_CODE (base) == ARRAY_REF)
1394 find_interesting_uses_op (data, array_ref_element_size (base));
1395 find_interesting_uses_op (data, array_ref_low_bound (base));
1397 return true;
1400 /* Finds addresses in *OP_P inside STMT. */
1402 static void
1403 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1405 tree base = unshare_expr (*op_p), step = NULL;
1406 struct iv *civ;
1407 struct ifs_ivopts_data ifs_ivopts_data;
1409 /* Ignore bitfields for now. Not really something terribly complicated
1410 to handle. TODO. */
1411 if (TREE_CODE (base) == COMPONENT_REF
1412 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1413 goto fail;
1415 ifs_ivopts_data.ivopts_data = data;
1416 ifs_ivopts_data.stmt = stmt;
1417 ifs_ivopts_data.step_p = &step;
1418 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1419 || zero_p (step))
1420 goto fail;
1422 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1423 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1425 if (TREE_CODE (base) == INDIRECT_REF)
1426 base = TREE_OPERAND (base, 0);
1427 else
1428 base = build_addr (base);
1430 civ = alloc_iv (base, step);
1431 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1432 return;
1434 fail:
1435 for_each_index (op_p, idx_record_use, data);
1438 /* Finds and records invariants used in STMT. */
1440 static void
1441 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1443 use_optype uses = NULL;
1444 unsigned i, n;
1445 tree op;
1447 if (TREE_CODE (stmt) == PHI_NODE)
1448 n = PHI_NUM_ARGS (stmt);
1449 else
1451 get_stmt_operands (stmt);
1452 uses = STMT_USE_OPS (stmt);
1453 n = NUM_USES (uses);
1456 for (i = 0; i < n; i++)
1458 if (TREE_CODE (stmt) == PHI_NODE)
1459 op = PHI_ARG_DEF (stmt, i);
1460 else
1461 op = USE_OP (uses, i);
1463 record_invariant (data, op, false);
1467 /* Finds interesting uses of induction variables in the statement STMT. */
1469 static void
1470 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1472 struct iv *iv;
1473 tree op, lhs, rhs;
1474 use_optype uses = NULL;
1475 unsigned i, n;
1477 find_invariants_stmt (data, stmt);
1479 if (TREE_CODE (stmt) == COND_EXPR)
1481 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1482 return;
1485 if (TREE_CODE (stmt) == MODIFY_EXPR)
1487 lhs = TREE_OPERAND (stmt, 0);
1488 rhs = TREE_OPERAND (stmt, 1);
1490 if (TREE_CODE (lhs) == SSA_NAME)
1492 /* If the statement defines an induction variable, the uses are not
1493 interesting by themselves. */
1495 iv = get_iv (data, lhs);
1497 if (iv && !zero_p (iv->step))
1498 return;
1501 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1503 case tcc_comparison:
1504 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1505 return;
1507 case tcc_reference:
1508 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1509 if (REFERENCE_CLASS_P (lhs))
1510 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1511 return;
1513 default: ;
1516 if (REFERENCE_CLASS_P (lhs)
1517 && is_gimple_val (rhs))
1519 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1520 find_interesting_uses_op (data, rhs);
1521 return;
1524 /* TODO -- we should also handle address uses of type
1526 memory = call (whatever);
1530 call (memory). */
1533 if (TREE_CODE (stmt) == PHI_NODE
1534 && bb_for_stmt (stmt) == data->current_loop->header)
1536 lhs = PHI_RESULT (stmt);
1537 iv = get_iv (data, lhs);
1539 if (iv && !zero_p (iv->step))
1540 return;
1543 if (TREE_CODE (stmt) == PHI_NODE)
1544 n = PHI_NUM_ARGS (stmt);
1545 else
1547 uses = STMT_USE_OPS (stmt);
1548 n = NUM_USES (uses);
1551 for (i = 0; i < n; i++)
1553 if (TREE_CODE (stmt) == PHI_NODE)
1554 op = PHI_ARG_DEF (stmt, i);
1555 else
1556 op = USE_OP (uses, i);
1558 if (TREE_CODE (op) != SSA_NAME)
1559 continue;
1561 iv = get_iv (data, op);
1562 if (!iv)
1563 continue;
1565 find_interesting_uses_op (data, op);
1569 /* Finds interesting uses of induction variables outside of loops
1570 on loop exit edge EXIT. */
1572 static void
1573 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1575 tree phi, def;
1577 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1579 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1580 find_interesting_uses_outer (data, def);
1584 /* Finds uses of the induction variables that are interesting. */
1586 static void
1587 find_interesting_uses (struct ivopts_data *data)
1589 basic_block bb;
1590 block_stmt_iterator bsi;
1591 tree phi;
1592 basic_block *body = get_loop_body (data->current_loop);
1593 unsigned i;
1594 struct version_info *info;
1595 edge e;
1597 if (dump_file && (dump_flags & TDF_DETAILS))
1598 fprintf (dump_file, "Uses:\n\n");
1600 for (i = 0; i < data->current_loop->num_nodes; i++)
1602 edge_iterator ei;
1603 bb = body[i];
1605 FOR_EACH_EDGE (e, ei, bb->succs)
1606 if (e->dest != EXIT_BLOCK_PTR
1607 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1608 find_interesting_uses_outside (data, e);
1610 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1611 find_interesting_uses_stmt (data, phi);
1612 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1613 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1616 if (dump_file && (dump_flags & TDF_DETAILS))
1618 bitmap_iterator bi;
1620 fprintf (dump_file, "\n");
1622 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1624 info = ver_info (data, i);
1625 if (info->inv_id)
1627 fprintf (dump_file, " ");
1628 print_generic_expr (dump_file, info->name, TDF_SLIM);
1629 fprintf (dump_file, " is invariant (%d)%s\n",
1630 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1634 fprintf (dump_file, "\n");
1637 free (body);
1640 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1641 position to POS. If USE is not NULL, the candidate is set as related to
1642 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1643 replacement of the final value of the iv by a direct computation. */
1645 static struct iv_cand *
1646 add_candidate_1 (struct ivopts_data *data,
1647 tree base, tree step, bool important, enum iv_position pos,
1648 struct iv_use *use, tree incremented_at)
1650 unsigned i;
1651 struct iv_cand *cand = NULL;
1652 tree type;
1654 if (base)
1656 type = TREE_TYPE (base);
1657 if (!TYPE_UNSIGNED (type))
1659 type = unsigned_type_for (type);
1660 base = fold_convert (type, base);
1661 if (step)
1662 step = fold_convert (type, step);
1666 for (i = 0; i < n_iv_cands (data); i++)
1668 cand = iv_cand (data, i);
1670 if (cand->pos != pos)
1671 continue;
1673 if (cand->incremented_at != incremented_at)
1674 continue;
1676 if (!cand->iv)
1678 if (!base && !step)
1679 break;
1681 continue;
1684 if (!base && !step)
1685 continue;
1687 if (!operand_equal_p (base, cand->iv->base, 0))
1688 continue;
1690 if (zero_p (cand->iv->step))
1692 if (zero_p (step))
1693 break;
1695 else
1697 if (step && operand_equal_p (step, cand->iv->step, 0))
1698 break;
1702 if (i == n_iv_cands (data))
1704 cand = xcalloc (1, sizeof (struct iv_cand));
1705 cand->id = i;
1707 if (!base && !step)
1708 cand->iv = NULL;
1709 else
1710 cand->iv = alloc_iv (base, step);
1712 cand->pos = pos;
1713 if (pos != IP_ORIGINAL && cand->iv)
1715 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1716 cand->var_after = cand->var_before;
1718 cand->important = important;
1719 cand->incremented_at = incremented_at;
1720 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 dump_cand (dump_file, cand);
1726 if (important && !cand->important)
1728 cand->important = true;
1729 if (dump_file && (dump_flags & TDF_DETAILS))
1730 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1733 if (use)
1735 bitmap_set_bit (use->related_cands, i);
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file, "Candidate %d is related to use %d\n",
1738 cand->id, use->id);
1741 return cand;
1744 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1745 position to POS. If USE is not NULL, the candidate is set as related to
1746 it. The candidate computation is scheduled on all available positions. */
1748 static void
1749 add_candidate (struct ivopts_data *data,
1750 tree base, tree step, bool important, struct iv_use *use)
1752 if (ip_normal_pos (data->current_loop))
1753 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1754 if (ip_end_pos (data->current_loop))
1755 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1758 /* Adds standard iv candidates. */
1760 static void
1761 add_standard_iv_candidates (struct ivopts_data *data)
1763 /* Add 0 + 1 * iteration candidate. */
1764 add_candidate (data,
1765 build_int_cst (unsigned_intSI_type_node, 0),
1766 build_int_cst (unsigned_intSI_type_node, 1),
1767 true, NULL);
1769 /* The same for a long type if it is still fast enough. */
1770 if (BITS_PER_WORD > 32)
1771 add_candidate (data,
1772 build_int_cst (unsigned_intDI_type_node, 0),
1773 build_int_cst (unsigned_intDI_type_node, 1),
1774 true, NULL);
1778 /* Adds candidates bases on the old induction variable IV. */
1780 static void
1781 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1783 tree phi, def;
1784 struct iv_cand *cand;
1786 add_candidate (data, iv->base, iv->step, true, NULL);
1788 /* The same, but with initial value zero. */
1789 add_candidate (data,
1790 build_int_cst (TREE_TYPE (iv->base), 0),
1791 iv->step, true, NULL);
1793 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1794 if (TREE_CODE (phi) == PHI_NODE)
1796 /* Additionally record the possibility of leaving the original iv
1797 untouched. */
1798 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1799 cand = add_candidate_1 (data,
1800 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1801 SSA_NAME_DEF_STMT (def));
1802 cand->var_before = iv->ssa_name;
1803 cand->var_after = def;
1807 /* Adds candidates based on the old induction variables. */
1809 static void
1810 add_old_ivs_candidates (struct ivopts_data *data)
1812 unsigned i;
1813 struct iv *iv;
1814 bitmap_iterator bi;
1816 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1818 iv = ver_info (data, i)->iv;
1819 if (iv && iv->biv_p && !zero_p (iv->step))
1820 add_old_iv_candidates (data, iv);
1824 /* Adds candidates based on the value of the induction variable IV and USE. */
1826 static void
1827 add_iv_value_candidates (struct ivopts_data *data,
1828 struct iv *iv, struct iv_use *use)
1830 add_candidate (data, iv->base, iv->step, false, use);
1832 /* The same, but with initial value zero. */
1833 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1834 iv->step, false, use);
1837 /* Adds candidates based on the address IV and USE. */
1839 static void
1840 add_address_candidates (struct ivopts_data *data,
1841 struct iv *iv, struct iv_use *use)
1843 tree base, abase, tmp, *act;
1845 /* First, the trivial choices. */
1846 add_iv_value_candidates (data, iv, use);
1848 /* Second, try removing the COMPONENT_REFs. */
1849 if (TREE_CODE (iv->base) == ADDR_EXPR)
1851 base = TREE_OPERAND (iv->base, 0);
1852 while (TREE_CODE (base) == COMPONENT_REF
1853 || (TREE_CODE (base) == ARRAY_REF
1854 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1855 base = TREE_OPERAND (base, 0);
1857 if (base != TREE_OPERAND (iv->base, 0))
1859 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1860 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1862 if (TREE_CODE (base) == INDIRECT_REF)
1863 base = TREE_OPERAND (base, 0);
1864 else
1865 base = build_addr (base);
1866 add_candidate (data, base, iv->step, false, use);
1870 /* Third, try removing the constant offset. */
1871 abase = iv->base;
1872 while (TREE_CODE (abase) == PLUS_EXPR
1873 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1874 abase = TREE_OPERAND (abase, 0);
1875 /* We found the offset, so make the copy of the non-shared part and
1876 remove it. */
1877 if (TREE_CODE (abase) == PLUS_EXPR)
1879 tmp = iv->base;
1880 act = &base;
1882 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1884 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1885 NULL_TREE, TREE_OPERAND (tmp, 1));
1886 act = &TREE_OPERAND (*act, 0);
1888 *act = TREE_OPERAND (tmp, 0);
1890 add_candidate (data, base, iv->step, false, use);
1894 /* Possibly adds pseudocandidate for replacing the final value of USE by
1895 a direct computation. */
1897 static void
1898 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1900 struct tree_niter_desc *niter;
1901 struct loop *loop = data->current_loop;
1903 /* We must know where we exit the loop and how many times does it roll. */
1904 if (!single_dom_exit (loop))
1905 return;
1907 niter = &loop_data (loop)->niter;
1908 if (!niter->niter
1909 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1910 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1911 return;
1913 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1916 /* Adds candidates based on the uses. */
1918 static void
1919 add_derived_ivs_candidates (struct ivopts_data *data)
1921 unsigned i;
1923 for (i = 0; i < n_iv_uses (data); i++)
1925 struct iv_use *use = iv_use (data, i);
1927 if (!use)
1928 continue;
1930 switch (use->type)
1932 case USE_NONLINEAR_EXPR:
1933 case USE_COMPARE:
1934 /* Just add the ivs based on the value of the iv used here. */
1935 add_iv_value_candidates (data, use->iv, use);
1936 break;
1938 case USE_OUTER:
1939 add_iv_value_candidates (data, use->iv, use);
1941 /* Additionally, add the pseudocandidate for the possibility to
1942 replace the final value by a direct computation. */
1943 add_iv_outer_candidates (data, use);
1944 break;
1946 case USE_ADDRESS:
1947 add_address_candidates (data, use->iv, use);
1948 break;
1950 default:
1951 gcc_unreachable ();
1956 /* Record important candidates and add them to related_cands bitmaps
1957 if needed. */
1959 static void
1960 record_important_candidates (struct ivopts_data *data)
1962 unsigned i;
1963 struct iv_use *use;
1965 for (i = 0; i < n_iv_cands (data); i++)
1967 struct iv_cand *cand = iv_cand (data, i);
1969 if (cand->important)
1970 bitmap_set_bit (data->important_candidates, i);
1973 data->consider_all_candidates = (n_iv_cands (data)
1974 <= CONSIDER_ALL_CANDIDATES_BOUND);
1976 if (data->consider_all_candidates)
1978 /* We will not need "related_cands" bitmaps in this case,
1979 so release them to decrease peak memory consumption. */
1980 for (i = 0; i < n_iv_uses (data); i++)
1982 use = iv_use (data, i);
1983 BITMAP_XFREE (use->related_cands);
1986 else
1988 /* Add important candidates to the related_cands bitmaps. */
1989 for (i = 0; i < n_iv_uses (data); i++)
1990 bitmap_ior_into (iv_use (data, i)->related_cands,
1991 data->important_candidates);
1995 /* Finds the candidates for the induction variables. */
1997 static void
1998 find_iv_candidates (struct ivopts_data *data)
2000 /* Add commonly used ivs. */
2001 add_standard_iv_candidates (data);
2003 /* Add old induction variables. */
2004 add_old_ivs_candidates (data);
2006 /* Add induction variables derived from uses. */
2007 add_derived_ivs_candidates (data);
2009 /* Record the important candidates. */
2010 record_important_candidates (data);
2013 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2014 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2015 we allocate a simple list to every use. */
2017 static void
2018 alloc_use_cost_map (struct ivopts_data *data)
2020 unsigned i, size, s, j;
2022 for (i = 0; i < n_iv_uses (data); i++)
2024 struct iv_use *use = iv_use (data, i);
2025 bitmap_iterator bi;
2027 if (data->consider_all_candidates)
2028 size = n_iv_cands (data);
2029 else
2031 s = 0;
2032 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2034 s++;
2037 /* Round up to the power of two, so that moduling by it is fast. */
2038 for (size = 1; size < s; size <<= 1)
2039 continue;
2042 use->n_map_members = size;
2043 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2047 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2048 on invariants DEPENDS_ON. */
2050 static void
2051 set_use_iv_cost (struct ivopts_data *data,
2052 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2053 bitmap depends_on)
2055 unsigned i, s;
2057 if (cost == INFTY)
2059 BITMAP_XFREE (depends_on);
2060 return;
2063 if (data->consider_all_candidates)
2065 use->cost_map[cand->id].cand = cand;
2066 use->cost_map[cand->id].cost = cost;
2067 use->cost_map[cand->id].depends_on = depends_on;
2068 return;
2071 /* n_map_members is a power of two, so this computes modulo. */
2072 s = cand->id & (use->n_map_members - 1);
2073 for (i = s; i < use->n_map_members; i++)
2074 if (!use->cost_map[i].cand)
2075 goto found;
2076 for (i = 0; i < s; i++)
2077 if (!use->cost_map[i].cand)
2078 goto found;
2080 gcc_unreachable ();
2082 found:
2083 use->cost_map[i].cand = cand;
2084 use->cost_map[i].cost = cost;
2085 use->cost_map[i].depends_on = depends_on;
2088 /* Gets cost of (USE, CANDIDATE) pair. */
2090 static struct cost_pair *
2091 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2092 struct iv_cand *cand)
2094 unsigned i, s;
2095 struct cost_pair *ret;
2097 if (!cand)
2098 return NULL;
2100 if (data->consider_all_candidates)
2102 ret = use->cost_map + cand->id;
2103 if (!ret->cand)
2104 return NULL;
2106 return ret;
2109 /* n_map_members is a power of two, so this computes modulo. */
2110 s = cand->id & (use->n_map_members - 1);
2111 for (i = s; i < use->n_map_members; i++)
2112 if (use->cost_map[i].cand == cand)
2113 return use->cost_map + i;
2115 for (i = 0; i < s; i++)
2116 if (use->cost_map[i].cand == cand)
2117 return use->cost_map + i;
2119 return NULL;
2122 /* Returns estimate on cost of computing SEQ. */
2124 static unsigned
2125 seq_cost (rtx seq)
2127 unsigned cost = 0;
2128 rtx set;
2130 for (; seq; seq = NEXT_INSN (seq))
2132 set = single_set (seq);
2133 if (set)
2134 cost += rtx_cost (set, SET);
2135 else
2136 cost++;
2139 return cost;
2142 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2143 static rtx
2144 produce_memory_decl_rtl (tree obj, int *regno)
2146 rtx x;
2147 if (!obj)
2148 abort ();
2149 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2151 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2152 x = gen_rtx_SYMBOL_REF (Pmode, name);
2154 else
2155 x = gen_raw_REG (Pmode, (*regno)++);
2157 return gen_rtx_MEM (DECL_MODE (obj), x);
2160 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2161 walk_tree. DATA contains the actual fake register number. */
2163 static tree
2164 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2166 tree obj = NULL_TREE;
2167 rtx x = NULL_RTX;
2168 int *regno = data;
2170 switch (TREE_CODE (*expr_p))
2172 case ADDR_EXPR:
2173 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2174 handled_component_p (*expr_p);
2175 expr_p = &TREE_OPERAND (*expr_p, 0))
2176 continue;
2177 obj = *expr_p;
2178 if (DECL_P (obj))
2179 x = produce_memory_decl_rtl (obj, regno);
2180 break;
2182 case SSA_NAME:
2183 *ws = 0;
2184 obj = SSA_NAME_VAR (*expr_p);
2185 if (!DECL_RTL_SET_P (obj))
2186 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2187 break;
2189 case VAR_DECL:
2190 case PARM_DECL:
2191 case RESULT_DECL:
2192 *ws = 0;
2193 obj = *expr_p;
2195 if (DECL_RTL_SET_P (obj))
2196 break;
2198 if (DECL_MODE (obj) == BLKmode)
2199 x = produce_memory_decl_rtl (obj, regno);
2200 else
2201 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2203 break;
2205 default:
2206 break;
2209 if (x)
2211 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2212 SET_DECL_RTL (obj, x);
2215 return NULL_TREE;
2218 /* Determines cost of the computation of EXPR. */
2220 static unsigned
2221 computation_cost (tree expr)
2223 rtx seq, rslt;
2224 tree type = TREE_TYPE (expr);
2225 unsigned cost;
2226 int regno = 0;
2228 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2229 start_sequence ();
2230 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2231 seq = get_insns ();
2232 end_sequence ();
2234 cost = seq_cost (seq);
2235 if (GET_CODE (rslt) == MEM)
2236 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2238 return cost;
2241 /* Returns variable containing the value of candidate CAND at statement AT. */
2243 static tree
2244 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2246 if (stmt_after_increment (loop, cand, stmt))
2247 return cand->var_after;
2248 else
2249 return cand->var_before;
2252 /* Determines the expression by that USE is expressed from induction variable
2253 CAND at statement AT in LOOP. */
2255 static tree
2256 get_computation_at (struct loop *loop,
2257 struct iv_use *use, struct iv_cand *cand, tree at)
2259 tree ubase = use->iv->base;
2260 tree ustep = use->iv->step;
2261 tree cbase = cand->iv->base;
2262 tree cstep = cand->iv->step;
2263 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2264 tree uutype;
2265 tree expr, delta;
2266 tree ratio;
2267 unsigned HOST_WIDE_INT ustepi, cstepi;
2268 HOST_WIDE_INT ratioi;
2270 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2272 /* We do not have a precision to express the values of use. */
2273 return NULL_TREE;
2276 expr = var_at_stmt (loop, cand, at);
2278 if (TREE_TYPE (expr) != ctype)
2280 /* This may happen with the original ivs. */
2281 expr = fold_convert (ctype, expr);
2284 if (TYPE_UNSIGNED (utype))
2285 uutype = utype;
2286 else
2288 uutype = unsigned_type_for (utype);
2289 ubase = fold_convert (uutype, ubase);
2290 ustep = fold_convert (uutype, ustep);
2293 if (uutype != ctype)
2295 expr = fold_convert (uutype, expr);
2296 cbase = fold_convert (uutype, cbase);
2297 cstep = fold_convert (uutype, cstep);
2300 if (!cst_and_fits_in_hwi (cstep)
2301 || !cst_and_fits_in_hwi (ustep))
2302 return NULL_TREE;
2304 ustepi = int_cst_value (ustep);
2305 cstepi = int_cst_value (cstep);
2307 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2309 /* TODO maybe consider case when ustep divides cstep and the ratio is
2310 a power of 2 (so that the division is fast to execute)? We would
2311 need to be much more careful with overflows etc. then. */
2312 return NULL_TREE;
2315 /* We may need to shift the value if we are after the increment. */
2316 if (stmt_after_increment (loop, cand, at))
2317 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2319 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2320 or |ratio| == 1, it is better to handle this like
2322 ubase - ratio * cbase + ratio * var. */
2324 if (ratioi == 1)
2326 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2327 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2329 else if (ratioi == -1)
2331 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2332 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2334 else if (TREE_CODE (cbase) == INTEGER_CST)
2336 ratio = build_int_cst_type (uutype, ratioi);
2337 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2338 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2339 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2340 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2342 else
2344 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2345 ratio = build_int_cst_type (uutype, ratioi);
2346 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2347 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2350 return fold_convert (utype, expr);
2353 /* Determines the expression by that USE is expressed from induction variable
2354 CAND in LOOP. */
2356 static tree
2357 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2359 return get_computation_at (loop, use, cand, use->stmt);
2362 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2364 static void
2365 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2367 tree op0, op1;
2368 enum tree_code code;
2370 while (1)
2372 if (cst_and_fits_in_hwi (*expr))
2374 *offset += int_cst_value (*expr);
2375 *expr = integer_zero_node;
2376 return;
2379 code = TREE_CODE (*expr);
2381 if (code != PLUS_EXPR && code != MINUS_EXPR)
2382 return;
2384 op0 = TREE_OPERAND (*expr, 0);
2385 op1 = TREE_OPERAND (*expr, 1);
2387 if (cst_and_fits_in_hwi (op1))
2389 if (code == PLUS_EXPR)
2390 *offset += int_cst_value (op1);
2391 else
2392 *offset -= int_cst_value (op1);
2394 *expr = op0;
2395 continue;
2398 if (code != PLUS_EXPR)
2399 return;
2401 if (!cst_and_fits_in_hwi (op0))
2402 return;
2404 *offset += int_cst_value (op0);
2405 *expr = op1;
2409 /* Returns cost of addition in MODE. */
2411 static unsigned
2412 add_cost (enum machine_mode mode)
2414 static unsigned costs[NUM_MACHINE_MODES];
2415 rtx seq;
2416 unsigned cost;
2418 if (costs[mode])
2419 return costs[mode];
2421 start_sequence ();
2422 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2423 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2424 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2425 NULL_RTX);
2426 seq = get_insns ();
2427 end_sequence ();
2429 cost = seq_cost (seq);
2430 if (!cost)
2431 cost = 1;
2433 costs[mode] = cost;
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file, "Addition in %s costs %d\n",
2437 GET_MODE_NAME (mode), cost);
2438 return cost;
2441 /* Entry in a hashtable of already known costs for multiplication. */
2442 struct mbc_entry
2444 HOST_WIDE_INT cst; /* The constant to multiply by. */
2445 enum machine_mode mode; /* In mode. */
2446 unsigned cost; /* The cost. */
2449 /* Counts hash value for the ENTRY. */
2451 static hashval_t
2452 mbc_entry_hash (const void *entry)
2454 const struct mbc_entry *e = entry;
2456 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2459 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2461 static int
2462 mbc_entry_eq (const void *entry1, const void *entry2)
2464 const struct mbc_entry *e1 = entry1;
2465 const struct mbc_entry *e2 = entry2;
2467 return (e1->mode == e2->mode
2468 && e1->cst == e2->cst);
2471 /* Returns cost of multiplication by constant CST in MODE. */
2473 static unsigned
2474 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2476 static htab_t costs;
2477 struct mbc_entry **cached, act;
2478 rtx seq;
2479 unsigned cost;
2481 if (!costs)
2482 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2484 act.mode = mode;
2485 act.cst = cst;
2486 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2487 if (*cached)
2488 return (*cached)->cost;
2490 *cached = xmalloc (sizeof (struct mbc_entry));
2491 (*cached)->mode = mode;
2492 (*cached)->cst = cst;
2494 start_sequence ();
2495 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2496 NULL_RTX, 0);
2497 seq = get_insns ();
2498 end_sequence ();
2500 cost = seq_cost (seq);
2502 if (dump_file && (dump_flags & TDF_DETAILS))
2503 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2504 (int) cst, GET_MODE_NAME (mode), cost);
2506 (*cached)->cost = cost;
2508 return cost;
2511 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2512 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2513 variable is omitted. The created memory accesses MODE.
2515 TODO -- there must be some better way. This all is quite crude. */
2517 static unsigned
2518 get_address_cost (bool symbol_present, bool var_present,
2519 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2521 #define MAX_RATIO 128
2522 static sbitmap valid_mult;
2523 static HOST_WIDE_INT rat, off;
2524 static HOST_WIDE_INT min_offset, max_offset;
2525 static unsigned costs[2][2][2][2];
2526 unsigned cost, acost;
2527 rtx seq, addr, base;
2528 bool offset_p, ratio_p;
2529 rtx reg1;
2530 HOST_WIDE_INT s_offset;
2531 unsigned HOST_WIDE_INT mask;
2532 unsigned bits;
2534 if (!valid_mult)
2536 HOST_WIDE_INT i;
2538 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2540 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2541 for (i = 1; i <= 1 << 20; i <<= 1)
2543 XEXP (addr, 1) = GEN_INT (i);
2544 if (!memory_address_p (Pmode, addr))
2545 break;
2547 max_offset = i >> 1;
2548 off = max_offset;
2550 for (i = 1; i <= 1 << 20; i <<= 1)
2552 XEXP (addr, 1) = GEN_INT (-i);
2553 if (!memory_address_p (Pmode, addr))
2554 break;
2556 min_offset = -(i >> 1);
2558 if (dump_file && (dump_flags & TDF_DETAILS))
2560 fprintf (dump_file, "get_address_cost:\n");
2561 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2562 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2565 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2566 sbitmap_zero (valid_mult);
2567 rat = 1;
2568 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2569 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2571 XEXP (addr, 1) = GEN_INT (i);
2572 if (memory_address_p (Pmode, addr))
2574 SET_BIT (valid_mult, i + MAX_RATIO);
2575 rat = i;
2579 if (dump_file && (dump_flags & TDF_DETAILS))
2581 fprintf (dump_file, " allowed multipliers:");
2582 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2583 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2584 fprintf (dump_file, " %d", (int) i);
2585 fprintf (dump_file, "\n");
2586 fprintf (dump_file, "\n");
2590 bits = GET_MODE_BITSIZE (Pmode);
2591 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2592 offset &= mask;
2593 if ((offset >> (bits - 1) & 1))
2594 offset |= ~mask;
2595 s_offset = offset;
2597 cost = 0;
2598 offset_p = (s_offset != 0
2599 && min_offset <= s_offset && s_offset <= max_offset);
2600 ratio_p = (ratio != 1
2601 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2602 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2604 if (ratio != 1 && !ratio_p)
2605 cost += multiply_by_cost (ratio, Pmode);
2607 if (s_offset && !offset_p && !symbol_present)
2609 cost += add_cost (Pmode);
2610 var_present = true;
2613 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2614 if (!acost)
2616 acost = 0;
2618 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2619 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2620 if (ratio_p)
2621 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2623 if (var_present)
2624 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2626 if (symbol_present)
2628 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2629 if (offset_p)
2630 base = gen_rtx_fmt_e (CONST, Pmode,
2631 gen_rtx_fmt_ee (PLUS, Pmode,
2632 base,
2633 GEN_INT (off)));
2635 else if (offset_p)
2636 base = GEN_INT (off);
2637 else
2638 base = NULL_RTX;
2640 if (base)
2641 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2643 start_sequence ();
2644 addr = memory_address (Pmode, addr);
2645 seq = get_insns ();
2646 end_sequence ();
2648 acost = seq_cost (seq);
2649 acost += address_cost (addr, Pmode);
2651 if (!acost)
2652 acost = 1;
2653 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2656 return cost + acost;
2659 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2660 the bitmap to that we should store it. */
2662 static struct ivopts_data *fd_ivopts_data;
2663 static tree
2664 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2666 bitmap *depends_on = data;
2667 struct version_info *info;
2669 if (TREE_CODE (*expr_p) != SSA_NAME)
2670 return NULL_TREE;
2671 info = name_info (fd_ivopts_data, *expr_p);
2673 if (!info->inv_id || info->has_nonlin_use)
2674 return NULL_TREE;
2676 if (!*depends_on)
2677 *depends_on = BITMAP_XMALLOC ();
2678 bitmap_set_bit (*depends_on, info->inv_id);
2680 return NULL_TREE;
2683 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2684 invariants the computation depends on. */
2686 static unsigned
2687 force_var_cost (struct ivopts_data *data,
2688 tree expr, bitmap *depends_on)
2690 static bool costs_initialized = false;
2691 static unsigned integer_cost;
2692 static unsigned symbol_cost;
2693 static unsigned address_cost;
2694 tree op0, op1;
2695 unsigned cost0, cost1, cost;
2696 enum machine_mode mode;
2698 if (!costs_initialized)
2700 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2701 rtx x = gen_rtx_MEM (DECL_MODE (var),
2702 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2703 tree addr;
2704 tree type = build_pointer_type (integer_type_node);
2706 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2707 2000));
2709 SET_DECL_RTL (var, x);
2710 TREE_STATIC (var) = 1;
2711 addr = build1 (ADDR_EXPR, type, var);
2712 symbol_cost = computation_cost (addr) + 1;
2714 address_cost
2715 = computation_cost (build2 (PLUS_EXPR, type,
2716 addr,
2717 build_int_cst_type (type, 2000))) + 1;
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2720 fprintf (dump_file, "force_var_cost:\n");
2721 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2722 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2723 fprintf (dump_file, " address %d\n", (int) address_cost);
2724 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2725 fprintf (dump_file, "\n");
2728 costs_initialized = true;
2731 if (depends_on)
2733 fd_ivopts_data = data;
2734 walk_tree (&expr, find_depends, depends_on, NULL);
2737 if (SSA_VAR_P (expr))
2738 return 0;
2740 if (TREE_INVARIANT (expr))
2742 if (TREE_CODE (expr) == INTEGER_CST)
2743 return integer_cost;
2745 if (TREE_CODE (expr) == ADDR_EXPR)
2747 tree obj = TREE_OPERAND (expr, 0);
2749 if (TREE_CODE (obj) == VAR_DECL
2750 || TREE_CODE (obj) == PARM_DECL
2751 || TREE_CODE (obj) == RESULT_DECL)
2752 return symbol_cost;
2755 return address_cost;
2758 switch (TREE_CODE (expr))
2760 case PLUS_EXPR:
2761 case MINUS_EXPR:
2762 case MULT_EXPR:
2763 op0 = TREE_OPERAND (expr, 0);
2764 op1 = TREE_OPERAND (expr, 1);
2766 if (is_gimple_val (op0))
2767 cost0 = 0;
2768 else
2769 cost0 = force_var_cost (data, op0, NULL);
2771 if (is_gimple_val (op1))
2772 cost1 = 0;
2773 else
2774 cost1 = force_var_cost (data, op1, NULL);
2776 break;
2778 default:
2779 /* Just an arbitrary value, FIXME. */
2780 return target_spill_cost;
2783 mode = TYPE_MODE (TREE_TYPE (expr));
2784 switch (TREE_CODE (expr))
2786 case PLUS_EXPR:
2787 case MINUS_EXPR:
2788 cost = add_cost (mode);
2789 break;
2791 case MULT_EXPR:
2792 if (cst_and_fits_in_hwi (op0))
2793 cost = multiply_by_cost (int_cst_value (op0), mode);
2794 else if (cst_and_fits_in_hwi (op1))
2795 cost = multiply_by_cost (int_cst_value (op1), mode);
2796 else
2797 return target_spill_cost;
2798 break;
2800 default:
2801 gcc_unreachable ();
2804 cost += cost0;
2805 cost += cost1;
2807 /* Bound the cost by target_spill_cost. The parts of complicated
2808 computations often are either loop invariant or at least can
2809 be shared between several iv uses, so letting this grow without
2810 limits would not give reasonable results. */
2811 return cost < target_spill_cost ? cost : target_spill_cost;
2814 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2815 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2816 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2817 invariants the computation depends on. */
2819 static unsigned
2820 split_address_cost (struct ivopts_data *data,
2821 tree addr, bool *symbol_present, bool *var_present,
2822 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2824 tree core;
2825 HOST_WIDE_INT bitsize;
2826 HOST_WIDE_INT bitpos;
2827 tree toffset;
2828 enum machine_mode mode;
2829 int unsignedp, volatilep;
2831 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2832 &unsignedp, &volatilep, false);
2834 if (toffset != 0
2835 || bitpos % BITS_PER_UNIT != 0
2836 || TREE_CODE (core) != VAR_DECL)
2838 *symbol_present = false;
2839 *var_present = true;
2840 fd_ivopts_data = data;
2841 walk_tree (&addr, find_depends, depends_on, NULL);
2842 return target_spill_cost;
2845 *offset += bitpos / BITS_PER_UNIT;
2846 if (TREE_STATIC (core)
2847 || DECL_EXTERNAL (core))
2849 *symbol_present = true;
2850 *var_present = false;
2851 return 0;
2854 *symbol_present = false;
2855 *var_present = true;
2856 return 0;
2859 /* Estimates cost of expressing difference of addresses E1 - E2 as
2860 var + symbol + offset. The value of offset is added to OFFSET,
2861 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2862 part is missing. DEPENDS_ON is a set of the invariants the computation
2863 depends on. */
2865 static unsigned
2866 ptr_difference_cost (struct ivopts_data *data,
2867 tree e1, tree e2, bool *symbol_present, bool *var_present,
2868 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2870 HOST_WIDE_INT diff = 0;
2871 unsigned cost;
2873 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2875 if (ptr_difference_const (e1, e2, &diff))
2877 *offset += diff;
2878 *symbol_present = false;
2879 *var_present = false;
2880 return 0;
2883 if (e2 == integer_zero_node)
2884 return split_address_cost (data, TREE_OPERAND (e1, 0),
2885 symbol_present, var_present, offset, depends_on);
2887 *symbol_present = false;
2888 *var_present = true;
2890 cost = force_var_cost (data, e1, depends_on);
2891 cost += force_var_cost (data, e2, depends_on);
2892 cost += add_cost (Pmode);
2894 return cost;
2897 /* Estimates cost of expressing difference E1 - E2 as
2898 var + symbol + offset. The value of offset is added to OFFSET,
2899 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2900 part is missing. DEPENDS_ON is a set of the invariants the computation
2901 depends on. */
2903 static unsigned
2904 difference_cost (struct ivopts_data *data,
2905 tree e1, tree e2, bool *symbol_present, bool *var_present,
2906 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2908 unsigned cost;
2909 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2911 strip_offset (&e1, offset);
2912 *offset = -*offset;
2913 strip_offset (&e2, offset);
2914 *offset = -*offset;
2916 if (TREE_CODE (e1) == ADDR_EXPR)
2917 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2918 depends_on);
2919 *symbol_present = false;
2921 if (operand_equal_p (e1, e2, 0))
2923 *var_present = false;
2924 return 0;
2926 *var_present = true;
2927 if (zero_p (e2))
2928 return force_var_cost (data, e1, depends_on);
2930 if (zero_p (e1))
2932 cost = force_var_cost (data, e2, depends_on);
2933 cost += multiply_by_cost (-1, mode);
2935 return cost;
2938 cost = force_var_cost (data, e1, depends_on);
2939 cost += force_var_cost (data, e2, depends_on);
2940 cost += add_cost (mode);
2942 return cost;
2945 /* Determines the cost of the computation by that USE is expressed
2946 from induction variable CAND. If ADDRESS_P is true, we just need
2947 to create an address from it, otherwise we want to get it into
2948 register. A set of invariants we depend on is stored in
2949 DEPENDS_ON. AT is the statement at that the value is computed. */
2951 static unsigned
2952 get_computation_cost_at (struct ivopts_data *data,
2953 struct iv_use *use, struct iv_cand *cand,
2954 bool address_p, bitmap *depends_on, tree at)
2956 tree ubase = use->iv->base, ustep = use->iv->step;
2957 tree cbase, cstep;
2958 tree utype = TREE_TYPE (ubase), ctype;
2959 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2960 HOST_WIDE_INT ratio, aratio;
2961 bool var_present, symbol_present;
2962 unsigned cost = 0, n_sums;
2964 *depends_on = NULL;
2966 /* Only consider real candidates. */
2967 if (!cand->iv)
2968 return INFTY;
2970 cbase = cand->iv->base;
2971 cstep = cand->iv->step;
2972 ctype = TREE_TYPE (cbase);
2974 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2976 /* We do not have a precision to express the values of use. */
2977 return INFTY;
2980 if (address_p)
2982 /* Do not try to express address of an object with computation based
2983 on address of a different object. This may cause problems in rtl
2984 level alias analysis (that does not expect this to be happening,
2985 as this is illegal in C), and would be unlikely to be useful
2986 anyway. */
2987 if (use->iv->base_object
2988 && cand->iv->base_object
2989 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2990 return INFTY;
2993 if (!cst_and_fits_in_hwi (ustep)
2994 || !cst_and_fits_in_hwi (cstep))
2995 return INFTY;
2997 if (TREE_CODE (ubase) == INTEGER_CST
2998 && !cst_and_fits_in_hwi (ubase))
2999 goto fallback;
3001 if (TREE_CODE (cbase) == INTEGER_CST
3002 && !cst_and_fits_in_hwi (cbase))
3003 goto fallback;
3005 ustepi = int_cst_value (ustep);
3006 cstepi = int_cst_value (cstep);
3008 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3010 /* TODO -- add direct handling of this case. */
3011 goto fallback;
3014 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3015 return INFTY;
3017 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3018 or ratio == 1, it is better to handle this like
3020 ubase - ratio * cbase + ratio * var
3022 (also holds in the case ratio == -1, TODO. */
3024 if (TREE_CODE (cbase) == INTEGER_CST)
3026 offset = - ratio * int_cst_value (cbase);
3027 cost += difference_cost (data,
3028 ubase, integer_zero_node,
3029 &symbol_present, &var_present, &offset,
3030 depends_on);
3032 else if (ratio == 1)
3034 cost += difference_cost (data,
3035 ubase, cbase,
3036 &symbol_present, &var_present, &offset,
3037 depends_on);
3039 else
3041 cost += force_var_cost (data, cbase, depends_on);
3042 cost += add_cost (TYPE_MODE (ctype));
3043 cost += difference_cost (data,
3044 ubase, integer_zero_node,
3045 &symbol_present, &var_present, &offset,
3046 depends_on);
3049 /* If we are after the increment, the value of the candidate is higher by
3050 one iteration. */
3051 if (stmt_after_increment (data->current_loop, cand, at))
3052 offset -= ratio * cstepi;
3054 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3055 (symbol/var/const parts may be omitted). If we are looking for an address,
3056 find the cost of addressing this. */
3057 if (address_p)
3058 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3060 /* Otherwise estimate the costs for computing the expression. */
3061 aratio = ratio > 0 ? ratio : -ratio;
3062 if (!symbol_present && !var_present && !offset)
3064 if (ratio != 1)
3065 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3067 return cost;
3070 if (aratio != 1)
3071 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3073 n_sums = 1;
3074 if (var_present
3075 /* Symbol + offset should be compile-time computable. */
3076 && (symbol_present || offset))
3077 n_sums++;
3079 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3081 fallback:
3083 /* Just get the expression, expand it and measure the cost. */
3084 tree comp = get_computation_at (data->current_loop, use, cand, at);
3086 if (!comp)
3087 return INFTY;
3089 if (address_p)
3090 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3092 return computation_cost (comp);
3096 /* Determines the cost of the computation by that USE is expressed
3097 from induction variable CAND. If ADDRESS_P is true, we just need
3098 to create an address from it, otherwise we want to get it into
3099 register. A set of invariants we depend on is stored in
3100 DEPENDS_ON. */
3102 static unsigned
3103 get_computation_cost (struct ivopts_data *data,
3104 struct iv_use *use, struct iv_cand *cand,
3105 bool address_p, bitmap *depends_on)
3107 return get_computation_cost_at (data,
3108 use, cand, address_p, depends_on, use->stmt);
3111 /* Determines cost of basing replacement of USE on CAND in a generic
3112 expression. */
3114 static bool
3115 determine_use_iv_cost_generic (struct ivopts_data *data,
3116 struct iv_use *use, struct iv_cand *cand)
3118 bitmap depends_on;
3119 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
3121 set_use_iv_cost (data, use, cand, cost, depends_on);
3123 return cost != INFTY;
3126 /* Determines cost of basing replacement of USE on CAND in an address. */
3128 static bool
3129 determine_use_iv_cost_address (struct ivopts_data *data,
3130 struct iv_use *use, struct iv_cand *cand)
3132 bitmap depends_on;
3133 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3135 set_use_iv_cost (data, use, cand, cost, depends_on);
3137 return cost != INFTY;
3140 /* Computes value of induction variable IV in iteration NITER. */
3142 static tree
3143 iv_value (struct iv *iv, tree niter)
3145 tree val;
3146 tree type = TREE_TYPE (iv->base);
3148 niter = fold_convert (type, niter);
3149 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3151 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3154 /* Computes value of candidate CAND at position AT in iteration NITER. */
3156 static tree
3157 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3159 tree val = iv_value (cand->iv, niter);
3160 tree type = TREE_TYPE (cand->iv->base);
3162 if (stmt_after_increment (loop, cand, at))
3163 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3165 return val;
3168 /* Check whether it is possible to express the condition in USE by comparison
3169 of candidate CAND. If so, store the comparison code to COMPARE and the
3170 value compared with to BOUND. */
3172 static bool
3173 may_eliminate_iv (struct loop *loop,
3174 struct iv_use *use, struct iv_cand *cand,
3175 enum tree_code *compare, tree *bound)
3177 basic_block ex_bb;
3178 edge exit;
3179 struct tree_niter_desc niter, new_niter;
3180 tree wider_type, type, base;
3182 /* For now works only for exits that dominate the loop latch. TODO -- extend
3183 for other conditions inside loop body. */
3184 ex_bb = bb_for_stmt (use->stmt);
3185 if (use->stmt != last_stmt (ex_bb)
3186 || TREE_CODE (use->stmt) != COND_EXPR)
3187 return false;
3188 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3189 return false;
3191 exit = EDGE_SUCC (ex_bb, 0);
3192 if (flow_bb_inside_loop_p (loop, exit->dest))
3193 exit = EDGE_SUCC (ex_bb, 1);
3194 if (flow_bb_inside_loop_p (loop, exit->dest))
3195 return false;
3197 niter.niter = NULL_TREE;
3198 number_of_iterations_exit (loop, exit, &niter);
3199 if (!niter.niter
3200 || !integer_nonzerop (niter.assumptions)
3201 || !integer_zerop (niter.may_be_zero))
3202 return false;
3204 if (exit->flags & EDGE_TRUE_VALUE)
3205 *compare = EQ_EXPR;
3206 else
3207 *compare = NE_EXPR;
3209 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3211 /* Let us check there is not some problem with overflows, by checking that
3212 the number of iterations is unchanged. */
3213 base = cand->iv->base;
3214 type = TREE_TYPE (base);
3215 if (stmt_after_increment (loop, cand, use->stmt))
3216 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3218 new_niter.niter = NULL_TREE;
3219 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3220 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3221 &new_niter);
3222 if (!new_niter.niter
3223 || !integer_nonzerop (new_niter.assumptions)
3224 || !integer_zerop (new_niter.may_be_zero))
3225 return false;
3227 wider_type = TREE_TYPE (new_niter.niter);
3228 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3229 wider_type = TREE_TYPE (niter.niter);
3230 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3231 fold_convert (wider_type, new_niter.niter), 0))
3232 return false;
3234 return true;
3237 /* Determines cost of basing replacement of USE on CAND in a condition. */
3239 static bool
3240 determine_use_iv_cost_condition (struct ivopts_data *data,
3241 struct iv_use *use, struct iv_cand *cand)
3243 tree bound;
3244 enum tree_code compare;
3246 /* Only consider real candidates. */
3247 if (!cand->iv)
3249 set_use_iv_cost (data, use, cand, INFTY, NULL);
3250 return false;
3253 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3255 bitmap depends_on = NULL;
3256 unsigned cost = force_var_cost (data, bound, &depends_on);
3258 set_use_iv_cost (data, use, cand, cost, depends_on);
3259 return cost != INFTY;
3262 /* The induction variable elimination failed; just express the original
3263 giv. If it is compared with an invariant, note that we cannot get
3264 rid of it. */
3265 if (TREE_CODE (*use->op_p) == SSA_NAME)
3266 record_invariant (data, *use->op_p, true);
3267 else
3269 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3270 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3273 return determine_use_iv_cost_generic (data, use, cand);
3276 /* Checks whether it is possible to replace the final value of USE by
3277 a direct computation. If so, the formula is stored to *VALUE. */
3279 static bool
3280 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3282 edge exit;
3283 struct tree_niter_desc *niter;
3285 exit = single_dom_exit (loop);
3286 if (!exit)
3287 return false;
3289 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3290 bb_for_stmt (use->stmt)));
3292 niter = &loop_data (loop)->niter;
3293 if (!niter->niter
3294 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3295 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3296 return false;
3298 *value = iv_value (use->iv, niter->niter);
3300 return true;
3303 /* Determines cost of replacing final value of USE using CAND. */
3305 static bool
3306 determine_use_iv_cost_outer (struct ivopts_data *data,
3307 struct iv_use *use, struct iv_cand *cand)
3309 bitmap depends_on;
3310 unsigned cost;
3311 edge exit;
3312 tree value;
3313 struct loop *loop = data->current_loop;
3315 if (!cand->iv)
3317 if (!may_replace_final_value (loop, use, &value))
3319 set_use_iv_cost (data, use, cand, INFTY, NULL);
3320 return false;
3323 depends_on = NULL;
3324 cost = force_var_cost (data, value, &depends_on);
3326 cost /= AVG_LOOP_NITER (loop);
3328 set_use_iv_cost (data, use, cand, cost, depends_on);
3329 return cost != INFTY;
3332 exit = single_dom_exit (loop);
3333 if (exit)
3335 /* If there is just a single exit, we may use value of the candidate
3336 after we take it to determine the value of use. */
3337 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3338 last_stmt (exit->src));
3339 if (cost != INFTY)
3340 cost /= AVG_LOOP_NITER (loop);
3342 else
3344 /* Otherwise we just need to compute the iv. */
3345 cost = get_computation_cost (data, use, cand, false, &depends_on);
3348 set_use_iv_cost (data, use, cand, cost, depends_on);
3350 return cost != INFTY;
3353 /* Determines cost of basing replacement of USE on CAND. Returns false
3354 if USE cannot be based on CAND. */
3356 static bool
3357 determine_use_iv_cost (struct ivopts_data *data,
3358 struct iv_use *use, struct iv_cand *cand)
3360 switch (use->type)
3362 case USE_NONLINEAR_EXPR:
3363 return determine_use_iv_cost_generic (data, use, cand);
3365 case USE_OUTER:
3366 return determine_use_iv_cost_outer (data, use, cand);
3368 case USE_ADDRESS:
3369 return determine_use_iv_cost_address (data, use, cand);
3371 case USE_COMPARE:
3372 return determine_use_iv_cost_condition (data, use, cand);
3374 default:
3375 gcc_unreachable ();
3379 /* Determines costs of basing the use of the iv on an iv candidate. */
3381 static void
3382 determine_use_iv_costs (struct ivopts_data *data)
3384 unsigned i, j;
3385 struct iv_use *use;
3386 struct iv_cand *cand;
3387 bitmap to_clear = BITMAP_XMALLOC ();
3389 alloc_use_cost_map (data);
3391 for (i = 0; i < n_iv_uses (data); i++)
3393 use = iv_use (data, i);
3395 if (data->consider_all_candidates)
3397 for (j = 0; j < n_iv_cands (data); j++)
3399 cand = iv_cand (data, j);
3400 determine_use_iv_cost (data, use, cand);
3403 else
3405 bitmap_iterator bi;
3407 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3409 cand = iv_cand (data, j);
3410 if (!determine_use_iv_cost (data, use, cand))
3411 bitmap_set_bit (to_clear, j);
3414 /* Remove the candidates for that the cost is infinite from
3415 the list of related candidates. */
3416 bitmap_and_compl_into (use->related_cands, to_clear);
3417 bitmap_clear (to_clear);
3421 BITMAP_XFREE (to_clear);
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, "Use-candidate costs:\n");
3427 for (i = 0; i < n_iv_uses (data); i++)
3429 use = iv_use (data, i);
3431 fprintf (dump_file, "Use %d:\n", i);
3432 fprintf (dump_file, " cand\tcost\tdepends on\n");
3433 for (j = 0; j < use->n_map_members; j++)
3435 if (!use->cost_map[j].cand
3436 || use->cost_map[j].cost == INFTY)
3437 continue;
3439 fprintf (dump_file, " %d\t%d\t",
3440 use->cost_map[j].cand->id,
3441 use->cost_map[j].cost);
3442 if (use->cost_map[j].depends_on)
3443 bitmap_print (dump_file,
3444 use->cost_map[j].depends_on, "","");
3445 fprintf (dump_file, "\n");
3448 fprintf (dump_file, "\n");
3450 fprintf (dump_file, "\n");
3454 /* Determines cost of the candidate CAND. */
3456 static void
3457 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3459 unsigned cost_base, cost_step;
3460 tree base, last;
3461 basic_block bb;
3463 if (!cand->iv)
3465 cand->cost = 0;
3466 return;
3469 /* There are two costs associated with the candidate -- its increment
3470 and its initialization. The second is almost negligible for any loop
3471 that rolls enough, so we take it just very little into account. */
3473 base = cand->iv->base;
3474 cost_base = force_var_cost (data, base, NULL);
3475 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3477 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3479 /* Prefer the original iv unless we may gain something by replacing it. */
3480 if (cand->pos == IP_ORIGINAL)
3481 cand->cost--;
3483 /* Prefer not to insert statements into latch unless there are some
3484 already (so that we do not create unnecessary jumps). */
3485 if (cand->pos == IP_END)
3487 bb = ip_end_pos (data->current_loop);
3488 last = last_stmt (bb);
3490 if (!last
3491 || TREE_CODE (last) == LABEL_EXPR)
3492 cand->cost++;
3496 /* Determines costs of computation of the candidates. */
3498 static void
3499 determine_iv_costs (struct ivopts_data *data)
3501 unsigned i;
3503 if (dump_file && (dump_flags & TDF_DETAILS))
3505 fprintf (dump_file, "Candidate costs:\n");
3506 fprintf (dump_file, " cand\tcost\n");
3509 for (i = 0; i < n_iv_cands (data); i++)
3511 struct iv_cand *cand = iv_cand (data, i);
3513 determine_iv_cost (data, cand);
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3516 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 fprintf (dump_file, "\n");
3523 /* Calculates cost for having SIZE induction variables. */
3525 static unsigned
3526 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3528 return global_cost_for_size (size,
3529 loop_data (data->current_loop)->regs_used,
3530 n_iv_uses (data));
3533 /* For each size of the induction variable set determine the penalty. */
3535 static void
3536 determine_set_costs (struct ivopts_data *data)
3538 unsigned j, n;
3539 tree phi, op;
3540 struct loop *loop = data->current_loop;
3541 bitmap_iterator bi;
3543 /* We use the following model (definitely improvable, especially the
3544 cost function -- TODO):
3546 We estimate the number of registers available (using MD data), name it A.
3548 We estimate the number of registers used by the loop, name it U. This
3549 number is obtained as the number of loop phi nodes (not counting virtual
3550 registers and bivs) + the number of variables from outside of the loop.
3552 We set a reserve R (free regs that are used for temporary computations,
3553 etc.). For now the reserve is a constant 3.
3555 Let I be the number of induction variables.
3557 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3558 make a lot of ivs without a reason).
3559 -- if A - R < U + I <= A, the cost is I * PRES_COST
3560 -- if U + I > A, the cost is I * PRES_COST and
3561 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3563 if (dump_file && (dump_flags & TDF_DETAILS))
3565 fprintf (dump_file, "Global costs:\n");
3566 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3567 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3568 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3569 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3572 n = 0;
3573 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3575 op = PHI_RESULT (phi);
3577 if (!is_gimple_reg (op))
3578 continue;
3580 if (get_iv (data, op))
3581 continue;
3583 n++;
3586 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3588 struct version_info *info = ver_info (data, j);
3590 if (info->inv_id && info->has_nonlin_use)
3591 n++;
3594 loop_data (loop)->regs_used = n;
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, " regs_used %d\n", n);
3598 if (dump_file && (dump_flags & TDF_DETAILS))
3600 fprintf (dump_file, " cost for size:\n");
3601 fprintf (dump_file, " ivs\tcost\n");
3602 for (j = 0; j <= 2 * target_avail_regs; j++)
3603 fprintf (dump_file, " %d\t%d\n", j,
3604 ivopts_global_cost_for_size (data, j));
3605 fprintf (dump_file, "\n");
3609 /* Returns true if A is a cheaper cost pair than B. */
3611 static bool
3612 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3614 if (!a)
3615 return false;
3617 if (!b)
3618 return true;
3620 if (a->cost < b->cost)
3621 return true;
3623 if (a->cost > b->cost)
3624 return false;
3626 /* In case the costs are the same, prefer the cheaper candidate. */
3627 if (a->cand->cost < b->cand->cost)
3628 return true;
3630 return false;
3633 /* Computes the cost field of IVS structure. */
3635 static void
3636 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3638 unsigned cost = 0;
3640 cost += ivs->cand_use_cost;
3641 cost += ivs->cand_cost;
3642 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3644 ivs->cost = cost;
3647 /* Set USE not to be expressed by any candidate in IVS. */
3649 static void
3650 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3651 struct iv_use *use)
3653 unsigned uid = use->id, cid, iid;
3654 bitmap deps;
3655 struct cost_pair *cp;
3656 bitmap_iterator bi;
3658 cp = ivs->cand_for_use[uid];
3659 if (!cp)
3660 return;
3661 cid = cp->cand->id;
3663 ivs->bad_uses++;
3664 ivs->cand_for_use[uid] = NULL;
3665 ivs->n_cand_uses[cid]--;
3667 if (ivs->n_cand_uses[cid] == 0)
3669 bitmap_clear_bit (ivs->cands, cid);
3670 /* Do not count the pseudocandidates. */
3671 if (cp->cand->iv)
3672 ivs->n_regs--;
3673 ivs->n_cands--;
3674 ivs->cand_cost -= cp->cand->cost;
3677 ivs->cand_use_cost -= cp->cost;
3679 deps = cp->depends_on;
3681 if (deps)
3683 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3685 ivs->n_invariant_uses[iid]--;
3686 if (ivs->n_invariant_uses[iid] == 0)
3687 ivs->n_regs--;
3691 iv_ca_recount_cost (data, ivs);
3694 /* Set cost pair for USE in set IVS to CP. */
3696 static void
3697 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3698 struct iv_use *use, struct cost_pair *cp)
3700 unsigned uid = use->id, cid, iid;
3701 bitmap deps;
3702 bitmap_iterator bi;
3704 if (ivs->cand_for_use[uid] == cp)
3705 return;
3707 if (ivs->cand_for_use[uid])
3708 iv_ca_set_no_cp (data, ivs, use);
3710 if (cp)
3712 cid = cp->cand->id;
3714 ivs->bad_uses--;
3715 ivs->cand_for_use[uid] = cp;
3716 ivs->n_cand_uses[cid]++;
3717 if (ivs->n_cand_uses[cid] == 1)
3719 bitmap_set_bit (ivs->cands, cid);
3720 /* Do not count the pseudocandidates. */
3721 if (cp->cand->iv)
3722 ivs->n_regs++;
3723 ivs->n_cands++;
3724 ivs->cand_cost += cp->cand->cost;
3727 ivs->cand_use_cost += cp->cost;
3729 deps = cp->depends_on;
3731 if (deps)
3733 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3735 ivs->n_invariant_uses[iid]++;
3736 if (ivs->n_invariant_uses[iid] == 1)
3737 ivs->n_regs++;
3741 iv_ca_recount_cost (data, ivs);
3745 /* Extend set IVS by expressing USE by some of the candidates in it
3746 if possible. */
3748 static void
3749 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3750 struct iv_use *use)
3752 struct cost_pair *best_cp = NULL, *cp;
3753 bitmap_iterator bi;
3754 unsigned i;
3756 gcc_assert (ivs->upto >= use->id);
3758 if (ivs->upto == use->id)
3760 ivs->upto++;
3761 ivs->bad_uses++;
3764 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3766 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3768 if (cheaper_cost_pair (cp, best_cp))
3769 best_cp = cp;
3772 iv_ca_set_cp (data, ivs, use, best_cp);
3775 /* Get cost for assignment IVS. */
3777 static unsigned
3778 iv_ca_cost (struct iv_ca *ivs)
3780 return (ivs->bad_uses ? INFTY : ivs->cost);
3783 /* Returns true if all dependences of CP are among invariants in IVS. */
3785 static bool
3786 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3788 unsigned i;
3789 bitmap_iterator bi;
3791 if (!cp->depends_on)
3792 return true;
3794 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3796 if (ivs->n_invariant_uses[i] == 0)
3797 return false;
3800 return true;
3803 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
3804 it before NEXT_CHANGE. */
3806 static struct iv_ca_delta *
3807 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
3808 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
3810 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
3812 change->use = use;
3813 change->old_cp = old_cp;
3814 change->new_cp = new_cp;
3815 change->next_change = next_change;
3817 return change;
3820 /* Joins two lists of changes L1 and L2. Destructive -- old lists
3821 are rewritten. */
3823 static struct iv_ca_delta *
3824 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
3826 struct iv_ca_delta *last;
3828 if (!l2)
3829 return l1;
3831 if (!l1)
3832 return l2;
3834 for (last = l1; last->next_change; last = last->next_change)
3835 continue;
3836 last->next_change = l2;
3838 return l1;
3841 /* Returns candidate by that USE is expressed in IVS. */
3843 static struct cost_pair *
3844 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
3846 return ivs->cand_for_use[use->id];
3849 /* Reverse the list of changes DELTA, forming the inverse to it. */
3851 static struct iv_ca_delta *
3852 iv_ca_delta_reverse (struct iv_ca_delta *delta)
3854 struct iv_ca_delta *act, *next, *prev = NULL;
3855 struct cost_pair *tmp;
3857 for (act = delta; act; act = next)
3859 next = act->next_change;
3860 act->next_change = prev;
3861 prev = act;
3863 tmp = act->old_cp;
3864 act->old_cp = act->new_cp;
3865 act->new_cp = tmp;
3868 return prev;
3871 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
3872 reverted instead. */
3874 static void
3875 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
3876 struct iv_ca_delta *delta, bool forward)
3878 struct cost_pair *from, *to;
3879 struct iv_ca_delta *act;
3881 if (!forward)
3882 delta = iv_ca_delta_reverse (delta);
3884 for (act = delta; act; act = act->next_change)
3886 from = act->old_cp;
3887 to = act->new_cp;
3888 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
3889 iv_ca_set_cp (data, ivs, act->use, to);
3892 if (!forward)
3893 iv_ca_delta_reverse (delta);
3896 /* Returns true if CAND is used in IVS. */
3898 static bool
3899 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
3901 return ivs->n_cand_uses[cand->id] > 0;
3904 /* Returns number of induction variable candidates in the set IVS. */
3906 static unsigned
3907 iv_ca_n_cands (struct iv_ca *ivs)
3909 return ivs->n_cands;
3912 /* Free the list of changes DELTA. */
3914 static void
3915 iv_ca_delta_free (struct iv_ca_delta **delta)
3917 struct iv_ca_delta *act, *next;
3919 for (act = *delta; act; act = next)
3921 next = act->next_change;
3922 free (act);
3925 *delta = NULL;
3928 /* Allocates new iv candidates assignment. */
3930 static struct iv_ca *
3931 iv_ca_new (struct ivopts_data *data)
3933 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
3935 nw->upto = 0;
3936 nw->bad_uses = 0;
3937 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
3938 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
3939 nw->cands = BITMAP_XMALLOC ();
3940 nw->n_cands = 0;
3941 nw->n_regs = 0;
3942 nw->cand_use_cost = 0;
3943 nw->cand_cost = 0;
3944 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
3945 nw->cost = 0;
3947 return nw;
3950 /* Free memory occupied by the set IVS. */
3952 static void
3953 iv_ca_free (struct iv_ca **ivs)
3955 free ((*ivs)->cand_for_use);
3956 free ((*ivs)->n_cand_uses);
3957 BITMAP_XFREE ((*ivs)->cands);
3958 free ((*ivs)->n_invariant_uses);
3959 free (*ivs);
3960 *ivs = NULL;
3963 /* Dumps IVS to FILE. */
3965 static void
3966 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
3968 const char *pref = " invariants ";
3969 unsigned i;
3971 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
3972 bitmap_print (file, ivs->cands, " candidates ","\n");
3974 for (i = 1; i <= data->max_inv_id; i++)
3975 if (ivs->n_invariant_uses[i])
3977 fprintf (file, "%s%d", pref, i);
3978 pref = ", ";
3980 fprintf (file, "\n");
3983 /* Try changing candidate in IVS to CAND for each use. Return cost of the
3984 new set, and store differences in DELTA. Number of induction variables
3985 in the new set is stored to N_IVS. */
3987 static unsigned
3988 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
3989 struct iv_cand *cand, struct iv_ca_delta **delta,
3990 unsigned *n_ivs)
3992 unsigned i, cost;
3993 struct iv_use *use;
3994 struct cost_pair *old_cp, *new_cp;
3996 *delta = NULL;
3997 for (i = 0; i < ivs->upto; i++)
3999 use = iv_use (data, i);
4000 old_cp = iv_ca_cand_for_use (ivs, use);
4002 if (old_cp
4003 && old_cp->cand == cand)
4004 continue;
4006 new_cp = get_use_iv_cost (data, use, cand);
4007 if (!new_cp)
4008 continue;
4010 if (!iv_ca_has_deps (ivs, new_cp))
4011 continue;
4013 if (!cheaper_cost_pair (new_cp, old_cp))
4014 continue;
4016 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4019 iv_ca_delta_commit (data, ivs, *delta, true);
4020 cost = iv_ca_cost (ivs);
4021 if (n_ivs)
4022 *n_ivs = iv_ca_n_cands (ivs);
4023 iv_ca_delta_commit (data, ivs, *delta, false);
4025 return cost;
4028 /* Try narrowing set IVS by removing CAND. Return the cost of
4029 the new set and store the differences in DELTA. */
4031 static unsigned
4032 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4033 struct iv_cand *cand, struct iv_ca_delta **delta)
4035 unsigned i, ci;
4036 struct iv_use *use;
4037 struct cost_pair *old_cp, *new_cp, *cp;
4038 bitmap_iterator bi;
4039 struct iv_cand *cnd;
4040 unsigned cost;
4042 *delta = NULL;
4043 for (i = 0; i < n_iv_uses (data); i++)
4045 use = iv_use (data, i);
4047 old_cp = iv_ca_cand_for_use (ivs, use);
4048 if (old_cp->cand != cand)
4049 continue;
4051 new_cp = NULL;
4053 if (data->consider_all_candidates)
4055 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4057 if (ci == cand->id)
4058 continue;
4060 cnd = iv_cand (data, ci);
4062 cp = get_use_iv_cost (data, use, cnd);
4063 if (!cp)
4064 continue;
4065 if (!iv_ca_has_deps (ivs, cp))
4066 continue;
4068 if (!cheaper_cost_pair (cp, new_cp))
4069 continue;
4071 new_cp = cp;
4074 else
4076 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4078 if (ci == cand->id)
4079 continue;
4081 cnd = iv_cand (data, ci);
4083 cp = get_use_iv_cost (data, use, cnd);
4084 if (!cp)
4085 continue;
4086 if (!iv_ca_has_deps (ivs, cp))
4087 continue;
4089 if (!cheaper_cost_pair (cp, new_cp))
4090 continue;
4092 new_cp = cp;
4096 if (!new_cp)
4098 iv_ca_delta_free (delta);
4099 return INFTY;
4102 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4105 iv_ca_delta_commit (data, ivs, *delta, true);
4106 cost = iv_ca_cost (ivs);
4107 iv_ca_delta_commit (data, ivs, *delta, false);
4109 return cost;
4112 /* Try optimizing the set of candidates IVS by removing candidates different
4113 from to EXCEPT_CAND from it. Return cost of the new set, and store
4114 differences in DELTA. */
4116 static unsigned
4117 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4118 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4120 bitmap_iterator bi;
4121 struct iv_ca_delta *act_delta, *best_delta;
4122 unsigned i, best_cost, acost;
4123 struct iv_cand *cand;
4125 best_delta = NULL;
4126 best_cost = iv_ca_cost (ivs);
4128 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4130 cand = iv_cand (data, i);
4132 if (cand == except_cand)
4133 continue;
4135 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4137 if (acost < best_cost)
4139 best_cost = acost;
4140 iv_ca_delta_free (&best_delta);
4141 best_delta = act_delta;
4143 else
4144 iv_ca_delta_free (&act_delta);
4147 if (!best_delta)
4149 *delta = NULL;
4150 return best_cost;
4153 /* Recurse to possibly remove other unnecessary ivs. */
4154 iv_ca_delta_commit (data, ivs, best_delta, true);
4155 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4156 iv_ca_delta_commit (data, ivs, best_delta, false);
4157 *delta = iv_ca_delta_join (best_delta, *delta);
4158 return best_cost;
4161 /* Tries to extend the sets IVS in the best possible way in order
4162 to express the USE. */
4164 static bool
4165 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4166 struct iv_use *use)
4168 unsigned best_cost, act_cost;
4169 unsigned i;
4170 bitmap_iterator bi;
4171 struct iv_cand *cand;
4172 struct iv_ca_delta *best_delta = NULL, *act_delta;
4173 struct cost_pair *cp;
4175 iv_ca_add_use (data, ivs, use);
4176 best_cost = iv_ca_cost (ivs);
4178 cp = iv_ca_cand_for_use (ivs, use);
4179 if (cp)
4181 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4182 iv_ca_set_no_cp (data, ivs, use);
4185 /* First try important candidates. Only if it fails, try the specific ones.
4186 Rationale -- in loops with many variables the best choice often is to use
4187 just one generic biv. If we added here many ivs specific to the uses,
4188 the optimization algorithm later would be likely to get stuck in a local
4189 minimum, thus causing us to create too many ivs. The approach from
4190 few ivs to more seems more likely to be successful -- starting from few
4191 ivs, replacing an expensive use by a specific iv should always be a
4192 win. */
4193 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4195 cand = iv_cand (data, i);
4197 if (iv_ca_cand_used_p (ivs, cand))
4198 continue;
4200 cp = get_use_iv_cost (data, use, cand);
4201 if (!cp)
4202 continue;
4204 iv_ca_set_cp (data, ivs, use, cp);
4205 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4206 iv_ca_set_no_cp (data, ivs, use);
4207 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4209 if (act_cost < best_cost)
4211 best_cost = act_cost;
4213 iv_ca_delta_free (&best_delta);
4214 best_delta = act_delta;
4216 else
4217 iv_ca_delta_free (&act_delta);
4220 if (best_cost == INFTY)
4222 for (i = 0; i < use->n_map_members; i++)
4224 cp = use->cost_map + i;
4225 cand = cp->cand;
4226 if (!cand)
4227 continue;
4229 /* Already tried this. */
4230 if (cand->important)
4231 continue;
4233 if (iv_ca_cand_used_p (ivs, cand))
4234 continue;
4236 act_delta = NULL;
4237 iv_ca_set_cp (data, ivs, use, cp);
4238 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4239 iv_ca_set_no_cp (data, ivs, use);
4240 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4241 cp, act_delta);
4243 if (act_cost < best_cost)
4245 best_cost = act_cost;
4247 if (best_delta)
4248 iv_ca_delta_free (&best_delta);
4249 best_delta = act_delta;
4251 else
4252 iv_ca_delta_free (&act_delta);
4256 iv_ca_delta_commit (data, ivs, best_delta, true);
4257 iv_ca_delta_free (&best_delta);
4259 return (best_cost != INFTY);
4262 /* Finds an initial assignment of candidates to uses. */
4264 static struct iv_ca *
4265 get_initial_solution (struct ivopts_data *data)
4267 struct iv_ca *ivs = iv_ca_new (data);
4268 unsigned i;
4270 for (i = 0; i < n_iv_uses (data); i++)
4271 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4273 iv_ca_free (&ivs);
4274 return NULL;
4277 return ivs;
4280 /* Tries to improve set of induction variables IVS. */
4282 static bool
4283 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4285 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4286 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4287 struct iv_cand *cand;
4289 /* Try extending the set of induction variables by one. */
4290 for (i = 0; i < n_iv_cands (data); i++)
4292 cand = iv_cand (data, i);
4294 if (iv_ca_cand_used_p (ivs, cand))
4295 continue;
4297 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4298 if (!act_delta)
4299 continue;
4301 /* If we successfully added the candidate and the set is small enough,
4302 try optimizing it by removing other candidates. */
4303 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4305 iv_ca_delta_commit (data, ivs, act_delta, true);
4306 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4307 iv_ca_delta_commit (data, ivs, act_delta, false);
4308 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4311 if (acost < best_cost)
4313 best_cost = acost;
4314 iv_ca_delta_free (&best_delta);
4315 best_delta = act_delta;
4317 else
4318 iv_ca_delta_free (&act_delta);
4321 if (!best_delta)
4323 /* Try removing the candidates from the set instead. */
4324 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4326 /* Nothing more we can do. */
4327 if (!best_delta)
4328 return false;
4331 iv_ca_delta_commit (data, ivs, best_delta, true);
4332 gcc_assert (best_cost == iv_ca_cost (ivs));
4333 iv_ca_delta_free (&best_delta);
4334 return true;
4337 /* Attempts to find the optimal set of induction variables. We do simple
4338 greedy heuristic -- we try to replace at most one candidate in the selected
4339 solution and remove the unused ivs while this improves the cost. */
4341 static struct iv_ca *
4342 find_optimal_iv_set (struct ivopts_data *data)
4344 unsigned i;
4345 struct iv_ca *set;
4346 struct iv_use *use;
4348 /* Get the initial solution. */
4349 set = get_initial_solution (data);
4350 if (!set)
4352 if (dump_file && (dump_flags & TDF_DETAILS))
4353 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4354 return NULL;
4357 if (dump_file && (dump_flags & TDF_DETAILS))
4359 fprintf (dump_file, "Initial set of candidates:\n");
4360 iv_ca_dump (data, dump_file, set);
4363 while (try_improve_iv_set (data, set))
4365 if (dump_file && (dump_flags & TDF_DETAILS))
4367 fprintf (dump_file, "Improved to:\n");
4368 iv_ca_dump (data, dump_file, set);
4372 if (dump_file && (dump_flags & TDF_DETAILS))
4373 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4375 for (i = 0; i < n_iv_uses (data); i++)
4377 use = iv_use (data, i);
4378 use->selected = iv_ca_cand_for_use (set, use)->cand;
4381 return set;
4384 /* Creates a new induction variable corresponding to CAND. */
4386 static void
4387 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4389 block_stmt_iterator incr_pos;
4390 tree base;
4391 bool after = false;
4393 if (!cand->iv)
4394 return;
4396 switch (cand->pos)
4398 case IP_NORMAL:
4399 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4400 break;
4402 case IP_END:
4403 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4404 after = true;
4405 break;
4407 case IP_ORIGINAL:
4408 /* Mark that the iv is preserved. */
4409 name_info (data, cand->var_before)->preserve_biv = true;
4410 name_info (data, cand->var_after)->preserve_biv = true;
4412 /* Rewrite the increment so that it uses var_before directly. */
4413 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4415 return;
4418 gimple_add_tmp_var (cand->var_before);
4419 add_referenced_tmp_var (cand->var_before);
4421 base = unshare_expr (cand->iv->base);
4423 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4424 &incr_pos, after, &cand->var_before, &cand->var_after);
4427 /* Creates new induction variables described in SET. */
4429 static void
4430 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4432 unsigned i;
4433 struct iv_cand *cand;
4434 bitmap_iterator bi;
4436 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4438 cand = iv_cand (data, i);
4439 create_new_iv (data, cand);
4443 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4444 is true, remove also the ssa name defined by the statement. */
4446 static void
4447 remove_statement (tree stmt, bool including_defined_name)
4449 if (TREE_CODE (stmt) == PHI_NODE)
4451 if (!including_defined_name)
4453 /* Prevent the ssa name defined by the statement from being removed. */
4454 SET_PHI_RESULT (stmt, NULL);
4456 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4458 else
4460 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4462 bsi_remove (&bsi);
4466 /* Rewrites USE (definition of iv used in a nonlinear expression)
4467 using candidate CAND. */
4469 static void
4470 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4471 struct iv_use *use, struct iv_cand *cand)
4473 tree comp = unshare_expr (get_computation (data->current_loop,
4474 use, cand));
4475 tree op, stmts, tgt, ass;
4476 block_stmt_iterator bsi, pbsi;
4478 switch (TREE_CODE (use->stmt))
4480 case PHI_NODE:
4481 tgt = PHI_RESULT (use->stmt);
4483 /* If we should keep the biv, do not replace it. */
4484 if (name_info (data, tgt)->preserve_biv)
4485 return;
4487 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4488 while (!bsi_end_p (pbsi)
4489 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4491 bsi = pbsi;
4492 bsi_next (&pbsi);
4494 break;
4496 case MODIFY_EXPR:
4497 tgt = TREE_OPERAND (use->stmt, 0);
4498 bsi = bsi_for_stmt (use->stmt);
4499 break;
4501 default:
4502 gcc_unreachable ();
4505 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4507 if (TREE_CODE (use->stmt) == PHI_NODE)
4509 if (stmts)
4510 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4511 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4512 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4513 remove_statement (use->stmt, false);
4514 SSA_NAME_DEF_STMT (tgt) = ass;
4516 else
4518 if (stmts)
4519 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4520 TREE_OPERAND (use->stmt, 1) = op;
4524 /* Replaces ssa name in index IDX by its basic variable. Callback for
4525 for_each_index. */
4527 static bool
4528 idx_remove_ssa_names (tree base, tree *idx,
4529 void *data ATTRIBUTE_UNUSED)
4531 tree *op;
4533 if (TREE_CODE (*idx) == SSA_NAME)
4534 *idx = SSA_NAME_VAR (*idx);
4536 if (TREE_CODE (base) == ARRAY_REF)
4538 op = &TREE_OPERAND (base, 2);
4539 if (*op
4540 && TREE_CODE (*op) == SSA_NAME)
4541 *op = SSA_NAME_VAR (*op);
4542 op = &TREE_OPERAND (base, 3);
4543 if (*op
4544 && TREE_CODE (*op) == SSA_NAME)
4545 *op = SSA_NAME_VAR (*op);
4548 return true;
4551 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4553 static tree
4554 unshare_and_remove_ssa_names (tree ref)
4556 ref = unshare_expr (ref);
4557 for_each_index (&ref, idx_remove_ssa_names, NULL);
4559 return ref;
4562 /* Rewrites base of memory access OP with expression WITH in statement
4563 pointed to by BSI. */
4565 static void
4566 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4568 tree bvar, var, new_var, new_name, copy, name;
4569 tree orig;
4571 var = bvar = get_base_address (*op);
4573 if (!var || TREE_CODE (with) != SSA_NAME)
4574 goto do_rewrite;
4576 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4577 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4578 if (TREE_CODE (var) == INDIRECT_REF)
4579 var = TREE_OPERAND (var, 0);
4580 if (TREE_CODE (var) == SSA_NAME)
4582 name = var;
4583 var = SSA_NAME_VAR (var);
4585 else if (DECL_P (var))
4586 name = NULL_TREE;
4587 else
4588 goto do_rewrite;
4590 if (var_ann (var)->type_mem_tag)
4591 var = var_ann (var)->type_mem_tag;
4593 /* We need to add a memory tag for the variable. But we do not want
4594 to add it to the temporary used for the computations, since this leads
4595 to problems in redundancy elimination when there are common parts
4596 in two computations referring to the different arrays. So we copy
4597 the variable to a new temporary. */
4598 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4599 if (name)
4600 new_name = duplicate_ssa_name (name, copy);
4601 else
4603 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4604 add_referenced_tmp_var (new_var);
4605 var_ann (new_var)->type_mem_tag = var;
4606 new_name = make_ssa_name (new_var, copy);
4608 TREE_OPERAND (copy, 0) = new_name;
4609 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4610 with = new_name;
4612 do_rewrite:
4614 orig = NULL_TREE;
4615 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4616 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4618 if (TREE_CODE (*op) == INDIRECT_REF)
4619 orig = REF_ORIGINAL (*op);
4620 if (!orig)
4621 orig = unshare_and_remove_ssa_names (*op);
4623 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4625 /* Record the original reference, for purposes of alias analysis. */
4626 REF_ORIGINAL (*op) = orig;
4629 /* Rewrites USE (address that is an iv) using candidate CAND. */
4631 static void
4632 rewrite_use_address (struct ivopts_data *data,
4633 struct iv_use *use, struct iv_cand *cand)
4635 tree comp = unshare_expr (get_computation (data->current_loop,
4636 use, cand));
4637 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4638 tree stmts;
4639 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4641 if (stmts)
4642 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4644 rewrite_address_base (&bsi, use->op_p, op);
4647 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4648 candidate CAND. */
4650 static void
4651 rewrite_use_compare (struct ivopts_data *data,
4652 struct iv_use *use, struct iv_cand *cand)
4654 tree comp;
4655 tree *op_p, cond, op, stmts, bound;
4656 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4657 enum tree_code compare;
4659 if (may_eliminate_iv (data->current_loop,
4660 use, cand, &compare, &bound))
4662 op = force_gimple_operand (unshare_expr (bound), &stmts,
4663 true, NULL_TREE);
4665 if (stmts)
4666 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4668 *use->op_p = build2 (compare, boolean_type_node,
4669 var_at_stmt (data->current_loop,
4670 cand, use->stmt), op);
4671 modify_stmt (use->stmt);
4672 return;
4675 /* The induction variable elimination failed; just express the original
4676 giv. */
4677 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4679 cond = *use->op_p;
4680 op_p = &TREE_OPERAND (cond, 0);
4681 if (TREE_CODE (*op_p) != SSA_NAME
4682 || zero_p (get_iv (data, *op_p)->step))
4683 op_p = &TREE_OPERAND (cond, 1);
4685 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4686 if (stmts)
4687 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4689 *op_p = op;
4692 /* Ensure that operand *OP_P may be used at the end of EXIT without
4693 violating loop closed ssa form. */
4695 static void
4696 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4698 basic_block def_bb;
4699 struct loop *def_loop;
4700 tree phi, use;
4702 use = USE_FROM_PTR (op_p);
4703 if (TREE_CODE (use) != SSA_NAME)
4704 return;
4706 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4707 if (!def_bb)
4708 return;
4710 def_loop = def_bb->loop_father;
4711 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4712 return;
4714 /* Try finding a phi node that copies the value out of the loop. */
4715 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4716 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4717 break;
4719 if (!phi)
4721 /* Create such a phi node. */
4722 tree new_name = duplicate_ssa_name (use, NULL);
4724 phi = create_phi_node (new_name, exit->dest);
4725 SSA_NAME_DEF_STMT (new_name) = phi;
4726 add_phi_arg (phi, use, exit);
4729 SET_USE (op_p, PHI_RESULT (phi));
4732 /* Ensure that operands of STMT may be used at the end of EXIT without
4733 violating loop closed ssa form. */
4735 static void
4736 protect_loop_closed_ssa_form (edge exit, tree stmt)
4738 use_optype uses;
4739 vuse_optype vuses;
4740 v_may_def_optype v_may_defs;
4741 unsigned i;
4743 get_stmt_operands (stmt);
4745 uses = STMT_USE_OPS (stmt);
4746 for (i = 0; i < NUM_USES (uses); i++)
4747 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4749 vuses = STMT_VUSE_OPS (stmt);
4750 for (i = 0; i < NUM_VUSES (vuses); i++)
4751 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4753 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4754 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4755 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4758 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4759 so that they are emitted on the correct place, and so that the loop closed
4760 ssa form is preserved. */
4762 static void
4763 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4765 tree_stmt_iterator tsi;
4766 block_stmt_iterator bsi;
4767 tree phi, stmt, def, next;
4769 if (EDGE_COUNT (exit->dest->preds) > 1)
4770 split_loop_exit_edge (exit);
4772 if (TREE_CODE (stmts) == STATEMENT_LIST)
4774 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4775 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4777 else
4778 protect_loop_closed_ssa_form (exit, stmts);
4780 /* Ensure there is label in exit->dest, so that we can
4781 insert after it. */
4782 tree_block_label (exit->dest);
4783 bsi = bsi_after_labels (exit->dest);
4784 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4786 if (!op)
4787 return;
4789 for (phi = phi_nodes (exit->dest); phi; phi = next)
4791 next = PHI_CHAIN (phi);
4793 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4795 def = PHI_RESULT (phi);
4796 remove_statement (phi, false);
4797 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4798 def, op);
4799 SSA_NAME_DEF_STMT (def) = stmt;
4800 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4805 /* Rewrites the final value of USE (that is only needed outside of the loop)
4806 using candidate CAND. */
4808 static void
4809 rewrite_use_outer (struct ivopts_data *data,
4810 struct iv_use *use, struct iv_cand *cand)
4812 edge exit;
4813 tree value, op, stmts, tgt;
4814 tree phi;
4816 switch (TREE_CODE (use->stmt))
4818 case PHI_NODE:
4819 tgt = PHI_RESULT (use->stmt);
4820 break;
4821 case MODIFY_EXPR:
4822 tgt = TREE_OPERAND (use->stmt, 0);
4823 break;
4824 default:
4825 gcc_unreachable ();
4828 exit = single_dom_exit (data->current_loop);
4830 if (exit)
4832 if (!cand->iv)
4834 bool ok = may_replace_final_value (data->current_loop, use, &value);
4835 gcc_assert (ok);
4837 else
4838 value = get_computation_at (data->current_loop,
4839 use, cand, last_stmt (exit->src));
4841 value = unshare_expr (value);
4842 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4844 /* If we will preserve the iv anyway and we would need to perform
4845 some computation to replace the final value, do nothing. */
4846 if (stmts && name_info (data, tgt)->preserve_biv)
4847 return;
4849 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4851 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4853 if (USE_FROM_PTR (use_p) == tgt)
4854 SET_USE (use_p, op);
4857 if (stmts)
4858 compute_phi_arg_on_exit (exit, stmts, op);
4860 /* Enable removal of the statement. We cannot remove it directly,
4861 since we may still need the aliasing information attached to the
4862 ssa name defined by it. */
4863 name_info (data, tgt)->iv->have_use_for = false;
4864 return;
4867 /* If the variable is going to be preserved anyway, there is nothing to
4868 do. */
4869 if (name_info (data, tgt)->preserve_biv)
4870 return;
4872 /* Otherwise we just need to compute the iv. */
4873 rewrite_use_nonlinear_expr (data, use, cand);
4876 /* Rewrites USE using candidate CAND. */
4878 static void
4879 rewrite_use (struct ivopts_data *data,
4880 struct iv_use *use, struct iv_cand *cand)
4882 switch (use->type)
4884 case USE_NONLINEAR_EXPR:
4885 rewrite_use_nonlinear_expr (data, use, cand);
4886 break;
4888 case USE_OUTER:
4889 rewrite_use_outer (data, use, cand);
4890 break;
4892 case USE_ADDRESS:
4893 rewrite_use_address (data, use, cand);
4894 break;
4896 case USE_COMPARE:
4897 rewrite_use_compare (data, use, cand);
4898 break;
4900 default:
4901 gcc_unreachable ();
4903 modify_stmt (use->stmt);
4906 /* Rewrite the uses using the selected induction variables. */
4908 static void
4909 rewrite_uses (struct ivopts_data *data)
4911 unsigned i;
4912 struct iv_cand *cand;
4913 struct iv_use *use;
4915 for (i = 0; i < n_iv_uses (data); i++)
4917 use = iv_use (data, i);
4918 cand = use->selected;
4919 gcc_assert (cand);
4921 rewrite_use (data, use, cand);
4925 /* Removes the ivs that are not used after rewriting. */
4927 static void
4928 remove_unused_ivs (struct ivopts_data *data)
4930 unsigned j;
4931 bitmap_iterator bi;
4933 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4935 struct version_info *info;
4937 info = ver_info (data, j);
4938 if (info->iv
4939 && !zero_p (info->iv->step)
4940 && !info->inv_id
4941 && !info->iv->have_use_for
4942 && !info->preserve_biv)
4943 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4947 /* Frees data allocated by the optimization of a single loop. */
4949 static void
4950 free_loop_data (struct ivopts_data *data)
4952 unsigned i, j;
4953 bitmap_iterator bi;
4955 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4957 struct version_info *info;
4959 info = ver_info (data, i);
4960 if (info->iv)
4961 free (info->iv);
4962 info->iv = NULL;
4963 info->has_nonlin_use = false;
4964 info->preserve_biv = false;
4965 info->inv_id = 0;
4967 bitmap_clear (data->relevant);
4968 bitmap_clear (data->important_candidates);
4970 for (i = 0; i < n_iv_uses (data); i++)
4972 struct iv_use *use = iv_use (data, i);
4974 free (use->iv);
4975 BITMAP_XFREE (use->related_cands);
4976 for (j = 0; j < use->n_map_members; j++)
4977 if (use->cost_map[j].depends_on)
4978 BITMAP_XFREE (use->cost_map[j].depends_on);
4979 free (use->cost_map);
4980 free (use);
4982 VARRAY_POP_ALL (data->iv_uses);
4984 for (i = 0; i < n_iv_cands (data); i++)
4986 struct iv_cand *cand = iv_cand (data, i);
4988 if (cand->iv)
4989 free (cand->iv);
4990 free (cand);
4992 VARRAY_POP_ALL (data->iv_candidates);
4994 if (data->version_info_size < num_ssa_names)
4996 data->version_info_size = 2 * num_ssa_names;
4997 free (data->version_info);
4998 data->version_info = xcalloc (data->version_info_size,
4999 sizeof (struct version_info));
5002 data->max_inv_id = 0;
5004 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5006 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5008 SET_DECL_RTL (obj, NULL_RTX);
5010 VARRAY_POP_ALL (decl_rtl_to_reset);
5013 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5014 loop tree. */
5016 static void
5017 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5019 unsigned i;
5021 for (i = 1; i < loops->num; i++)
5022 if (loops->parray[i])
5024 free (loops->parray[i]->aux);
5025 loops->parray[i]->aux = NULL;
5028 free_loop_data (data);
5029 free (data->version_info);
5030 BITMAP_XFREE (data->relevant);
5031 BITMAP_XFREE (data->important_candidates);
5033 VARRAY_FREE (decl_rtl_to_reset);
5034 VARRAY_FREE (data->iv_uses);
5035 VARRAY_FREE (data->iv_candidates);
5038 /* Optimizes the LOOP. Returns true if anything changed. */
5040 static bool
5041 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5043 bool changed = false;
5044 struct iv_ca *iv_ca;
5045 edge exit;
5047 data->current_loop = loop;
5049 if (dump_file && (dump_flags & TDF_DETAILS))
5051 fprintf (dump_file, "Processing loop %d\n", loop->num);
5053 exit = single_dom_exit (loop);
5054 if (exit)
5056 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5057 exit->src->index, exit->dest->index);
5058 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5059 fprintf (dump_file, "\n");
5062 fprintf (dump_file, "\n");
5065 /* For each ssa name determines whether it behaves as an induction variable
5066 in some loop. */
5067 if (!find_induction_variables (data))
5068 goto finish;
5070 /* Finds interesting uses (item 1). */
5071 find_interesting_uses (data);
5072 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5073 goto finish;
5075 /* Finds candidates for the induction variables (item 2). */
5076 find_iv_candidates (data);
5078 /* Calculates the costs (item 3, part 1). */
5079 determine_use_iv_costs (data);
5080 determine_iv_costs (data);
5081 determine_set_costs (data);
5083 /* Find the optimal set of induction variables (item 3, part 2). */
5084 iv_ca = find_optimal_iv_set (data);
5085 if (!iv_ca)
5086 goto finish;
5087 changed = true;
5089 /* Create the new induction variables (item 4, part 1). */
5090 create_new_ivs (data, iv_ca);
5091 iv_ca_free (&iv_ca);
5093 /* Rewrite the uses (item 4, part 2). */
5094 rewrite_uses (data);
5096 /* Remove the ivs that are unused after rewriting. */
5097 remove_unused_ivs (data);
5099 loop_commit_inserts ();
5101 /* We have changed the structure of induction variables; it might happen
5102 that definitions in the scev database refer to some of them that were
5103 eliminated. */
5104 scev_reset ();
5106 finish:
5107 free_loop_data (data);
5109 return changed;
5112 /* Main entry point. Optimizes induction variables in LOOPS. */
5114 void
5115 tree_ssa_iv_optimize (struct loops *loops)
5117 struct loop *loop;
5118 struct ivopts_data data;
5120 tree_ssa_iv_optimize_init (loops, &data);
5122 /* Optimize the loops starting with the innermost ones. */
5123 loop = loops->tree_root;
5124 while (loop->inner)
5125 loop = loop->inner;
5127 #ifdef ENABLE_CHECKING
5128 verify_loop_closed_ssa ();
5129 verify_stmts ();
5130 #endif
5132 /* Scan the loops, inner ones first. */
5133 while (loop != loops->tree_root)
5135 if (dump_file && (dump_flags & TDF_DETAILS))
5136 flow_loop_dump (loop, dump_file, NULL, 1);
5138 tree_ssa_iv_optimize_loop (&data, loop);
5140 if (loop->next)
5142 loop = loop->next;
5143 while (loop->inner)
5144 loop = loop->inner;
5146 else
5147 loop = loop->outer;
5150 #ifdef ENABLE_CHECKING
5151 verify_loop_closed_ssa ();
5152 verify_stmts ();
5153 #endif
5155 tree_ssa_iv_optimize_finalize (loops, &data);