* gcse.c: Remove an obsolete comment.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobd1adbabc07b7be0e1c3e2e749a5cc77c0ed0ffa8
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Information attached to loop. */
124 struct loop_data
126 unsigned regs_used; /* Number of registers used. */
129 /* Types of uses. */
130 enum use_type
132 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
133 USE_OUTER, /* The induction variable is used outside the loop. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
138 /* The candidate - cost pair. */
139 struct cost_pair
141 struct iv_cand *cand; /* The candidate. */
142 unsigned cost; /* The cost. */
143 bitmap depends_on; /* The list of invariants that have to be
144 preserved. */
147 /* Use. */
148 struct iv_use
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 important ones. */
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
162 struct iv_cand *selected;
163 /* The selected candidate. */
166 /* The position where the iv is computed. */
167 enum iv_position
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
174 /* The induction variable candidate. */
175 struct iv_cand
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
182 incremented. */
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
192 /* The data used by the induction variable optimizations. */
194 struct ivopts_data
196 /* The currently optimized loop. */
197 struct loop *current_loop;
199 /* Numbers of iterations for all exits of the current loop. */
200 htab_t niters;
202 /* The size of version_info array allocated. */
203 unsigned version_info_size;
205 /* The array of information for the ssa names. */
206 struct version_info *version_info;
208 /* The bitmap of indices in version_info whose value was changed. */
209 bitmap relevant;
211 /* The maximum invariant id. */
212 unsigned max_inv_id;
214 /* The uses of induction variables. */
215 varray_type iv_uses;
217 /* The candidates. */
218 varray_type iv_candidates;
220 /* A bitmap of important candidates. */
221 bitmap important_candidates;
223 /* Whether to consider just related and important candidates when replacing a
224 use. */
225 bool consider_all_candidates;
228 /* An assignment of iv candidates to uses. */
230 struct iv_ca
232 /* The number of uses covered by the assignment. */
233 unsigned upto;
235 /* Number of uses that cannot be expressed by the candidates in the set. */
236 unsigned bad_uses;
238 /* Candidate assigned to a use, together with the related costs. */
239 struct cost_pair **cand_for_use;
241 /* Number of times each candidate is used. */
242 unsigned *n_cand_uses;
244 /* The candidates used. */
245 bitmap cands;
247 /* The number of candidates in the set. */
248 unsigned n_cands;
250 /* Total number of registers needed. */
251 unsigned n_regs;
253 /* Total cost of expressing uses. */
254 unsigned cand_use_cost;
256 /* Total cost of candidates. */
257 unsigned cand_cost;
259 /* Number of times each invariant is used. */
260 unsigned *n_invariant_uses;
262 /* Total cost of the assignment. */
263 unsigned cost;
266 /* Difference of two iv candidate assignments. */
268 struct iv_ca_delta
270 /* Changed use. */
271 struct iv_use *use;
273 /* An old assignment (for rollback purposes). */
274 struct cost_pair *old_cp;
276 /* A new assignment. */
277 struct cost_pair *new_cp;
279 /* Next change in the list. */
280 struct iv_ca_delta *next_change;
283 /* Bound on number of candidates below that all candidates are considered. */
285 #define CONSIDER_ALL_CANDIDATES_BOUND \
286 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288 /* If there are more iv occurrences, we just give up (it is quite unlikely that
289 optimizing such a loop would help, and it would take ages). */
291 #define MAX_CONSIDERED_USES \
292 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294 /* If there are at most this number of ivs in the set, try removing unnecessary
295 ivs from the set always. */
297 #define ALWAYS_PRUNE_CAND_SET_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300 /* The list of trees for that the decl_rtl field must be reset is stored
301 here. */
303 static varray_type decl_rtl_to_reset;
305 /* Number of uses recorded in DATA. */
307 static inline unsigned
308 n_iv_uses (struct ivopts_data *data)
310 return VARRAY_ACTIVE_SIZE (data->iv_uses);
313 /* Ith use recorded in DATA. */
315 static inline struct iv_use *
316 iv_use (struct ivopts_data *data, unsigned i)
318 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
321 /* Number of candidates recorded in DATA. */
323 static inline unsigned
324 n_iv_cands (struct ivopts_data *data)
326 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
329 /* Ith candidate recorded in DATA. */
331 static inline struct iv_cand *
332 iv_cand (struct ivopts_data *data, unsigned i)
334 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
337 /* The data for LOOP. */
339 static inline struct loop_data *
340 loop_data (struct loop *loop)
342 return loop->aux;
345 /* The single loop exit if it dominates the latch, NULL otherwise. */
347 static edge
348 single_dom_exit (struct loop *loop)
350 edge exit = loop->single_exit;
352 if (!exit)
353 return NULL;
355 if (!just_once_each_iteration_p (loop, exit->src))
356 return NULL;
358 return exit;
361 /* Dumps information about the induction variable IV to FILE. */
363 extern void dump_iv (FILE *, struct iv *);
364 void
365 dump_iv (FILE *file, struct iv *iv)
367 if (iv->ssa_name)
369 fprintf (file, "ssa name ");
370 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
371 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
378 if (iv->step)
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
388 else
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 if (iv->base_object)
397 fprintf (file, " base object ");
398 print_generic_expr (file, iv->base_object, TDF_SLIM);
399 fprintf (file, "\n");
402 if (iv->biv_p)
403 fprintf (file, " is a biv\n");
406 /* Dumps information about the USE to FILE. */
408 extern void dump_use (FILE *, struct iv_use *);
409 void
410 dump_use (FILE *file, struct iv_use *use)
412 fprintf (file, "use %d\n", use->id);
414 switch (use->type)
416 case USE_NONLINEAR_EXPR:
417 fprintf (file, " generic\n");
418 break;
420 case USE_OUTER:
421 fprintf (file, " outside\n");
422 break;
424 case USE_ADDRESS:
425 fprintf (file, " address\n");
426 break;
428 case USE_COMPARE:
429 fprintf (file, " compare\n");
430 break;
432 default:
433 gcc_unreachable ();
436 fprintf (file, " in statement ");
437 print_generic_expr (file, use->stmt, TDF_SLIM);
438 fprintf (file, "\n");
440 fprintf (file, " at position ");
441 if (use->op_p)
442 print_generic_expr (file, *use->op_p, TDF_SLIM);
443 fprintf (file, "\n");
445 dump_iv (file, use->iv);
447 if (use->related_cands)
449 fprintf (file, " related candidates ");
450 dump_bitmap (file, use->related_cands);
454 /* Dumps information about the uses to FILE. */
456 extern void dump_uses (FILE *, struct ivopts_data *);
457 void
458 dump_uses (FILE *file, struct ivopts_data *data)
460 unsigned i;
461 struct iv_use *use;
463 for (i = 0; i < n_iv_uses (data); i++)
465 use = iv_use (data, i);
467 dump_use (file, use);
468 fprintf (file, "\n");
472 /* Dumps information about induction variable candidate CAND to FILE. */
474 extern void dump_cand (FILE *, struct iv_cand *);
475 void
476 dump_cand (FILE *file, struct iv_cand *cand)
478 struct iv *iv = cand->iv;
480 fprintf (file, "candidate %d%s\n",
481 cand->id, cand->important ? " (important)" : "");
483 if (!iv)
485 fprintf (file, " final value replacement\n");
486 return;
489 switch (cand->pos)
491 case IP_NORMAL:
492 fprintf (file, " incremented before exit test\n");
493 break;
495 case IP_END:
496 fprintf (file, " incremented at end\n");
497 break;
499 case IP_ORIGINAL:
500 fprintf (file, " original biv\n");
501 break;
504 dump_iv (file, iv);
507 /* Returns the info for ssa version VER. */
509 static inline struct version_info *
510 ver_info (struct ivopts_data *data, unsigned ver)
512 return data->version_info + ver;
515 /* Returns the info for ssa name NAME. */
517 static inline struct version_info *
518 name_info (struct ivopts_data *data, tree name)
520 return ver_info (data, SSA_NAME_VERSION (name));
523 /* Checks whether there exists number X such that X * B = A, counting modulo
524 2^BITS. */
526 static bool
527 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
528 HOST_WIDE_INT *x)
530 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
531 unsigned HOST_WIDE_INT inv, ex, val;
532 unsigned i;
534 a &= mask;
535 b &= mask;
537 /* First divide the whole equation by 2 as long as possible. */
538 while (!(a & 1) && !(b & 1))
540 a >>= 1;
541 b >>= 1;
542 bits--;
543 mask >>= 1;
546 if (!(b & 1))
548 /* If b is still even, a is odd and there is no such x. */
549 return false;
552 /* Find the inverse of b. We compute it as
553 b^(2^(bits - 1) - 1) (mod 2^bits). */
554 inv = 1;
555 ex = b;
556 for (i = 0; i < bits - 1; i++)
558 inv = (inv * ex) & mask;
559 ex = (ex * ex) & mask;
562 val = (a * inv) & mask;
564 gcc_assert (((val * b) & mask) == a);
566 if ((val >> (bits - 1)) & 1)
567 val |= ~mask;
569 *x = val;
571 return true;
574 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
575 emitted in LOOP. */
577 static bool
578 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582 gcc_assert (bb);
584 if (sbb == loop->latch)
585 return true;
587 if (sbb != bb)
588 return false;
590 return stmt == last_stmt (bb);
593 /* Returns true if STMT if after the place where the original induction
594 variable CAND is incremented. */
596 static bool
597 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
600 basic_block stmt_bb = bb_for_stmt (stmt);
601 block_stmt_iterator bsi;
603 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
604 return false;
606 if (stmt_bb != cand_bb)
607 return true;
609 /* Scan the block from the end, since the original ivs are usually
610 incremented at the end of the loop body. */
611 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 if (bsi_stmt (bsi) == cand->incremented_at)
614 return false;
615 if (bsi_stmt (bsi) == stmt)
616 return true;
620 /* Returns true if STMT if after the place where the induction variable
621 CAND is incremented in LOOP. */
623 static bool
624 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626 switch (cand->pos)
628 case IP_END:
629 return false;
631 case IP_NORMAL:
632 return stmt_after_ip_normal_pos (loop, stmt);
634 case IP_ORIGINAL:
635 return stmt_after_ip_original_pos (cand, stmt);
637 default:
638 gcc_unreachable ();
642 /* Element of the table in that we cache the numbers of iterations obtained
643 from exits of the loop. */
645 struct nfe_cache_elt
647 /* The edge for that the number of iterations is cached. */
648 edge exit;
650 /* True if the # of iterations was successfully determined. */
651 bool valid_p;
653 /* Description of # of iterations. */
654 struct tree_niter_desc niter;
657 /* Hash function for nfe_cache_elt E. */
659 static hashval_t
660 nfe_hash (const void *e)
662 const struct nfe_cache_elt *elt = e;
664 return htab_hash_pointer (elt->exit);
667 /* Equality function for nfe_cache_elt E1 and edge E2. */
669 static int
670 nfe_eq (const void *e1, const void *e2)
672 const struct nfe_cache_elt *elt1 = e1;
674 return elt1->exit == e2;
677 /* Returns structure describing number of iterations determined from
678 EXIT of DATA->current_loop, or NULL if something goes wrong. */
680 static struct tree_niter_desc *
681 niter_for_exit (struct ivopts_data *data, edge exit)
683 struct nfe_cache_elt *nfe_desc;
684 PTR *slot;
686 slot = htab_find_slot_with_hash (data->niters, exit,
687 htab_hash_pointer (exit),
688 INSERT);
690 if (!*slot)
692 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
693 nfe_desc->exit = exit;
694 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
695 exit, &nfe_desc->niter);
696 *slot = nfe_desc;
698 else
699 nfe_desc = *slot;
701 if (!nfe_desc->valid_p)
702 return NULL;
704 return &nfe_desc->niter;
707 /* Returns structure describing number of iterations determined from
708 single dominating exit of DATA->current_loop, or NULL if something
709 goes wrong. */
711 static struct tree_niter_desc *
712 niter_for_single_dom_exit (struct ivopts_data *data)
714 edge exit = single_dom_exit (data->current_loop);
716 if (!exit)
717 return NULL;
719 return niter_for_exit (data, exit);
722 /* Initializes data structures used by the iv optimization pass, stored
723 in DATA. LOOPS is the loop tree. */
725 static void
726 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
728 unsigned i;
730 data->version_info_size = 2 * num_ssa_names;
731 data->version_info = xcalloc (data->version_info_size,
732 sizeof (struct version_info));
733 data->relevant = BITMAP_ALLOC (NULL);
734 data->important_candidates = BITMAP_ALLOC (NULL);
735 data->max_inv_id = 0;
736 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
738 for (i = 1; i < loops->num; i++)
739 if (loops->parray[i])
740 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
742 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
744 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
747 /* Returns a memory object to that EXPR points. In case we are able to
748 determine that it does not point to any such object, NULL is returned. */
750 static tree
751 determine_base_object (tree expr)
753 enum tree_code code = TREE_CODE (expr);
754 tree base, obj, op0, op1;
756 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
757 return NULL_TREE;
759 switch (code)
761 case INTEGER_CST:
762 return NULL_TREE;
764 case ADDR_EXPR:
765 obj = TREE_OPERAND (expr, 0);
766 base = get_base_address (obj);
768 if (!base)
769 return expr;
771 if (TREE_CODE (base) == INDIRECT_REF)
772 return determine_base_object (TREE_OPERAND (base, 0));
774 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
776 case PLUS_EXPR:
777 case MINUS_EXPR:
778 op0 = determine_base_object (TREE_OPERAND (expr, 0));
779 op1 = determine_base_object (TREE_OPERAND (expr, 1));
781 if (!op1)
782 return op0;
784 if (!op0)
785 return (code == PLUS_EXPR
786 ? op1
787 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
789 return fold (build (code, ptr_type_node, op0, op1));
791 case NOP_EXPR:
792 case CONVERT_EXPR:
793 return determine_base_object (TREE_OPERAND (expr, 0));
795 default:
796 return fold_convert (ptr_type_node, expr);
800 /* Allocates an induction variable with given initial value BASE and step STEP
801 for loop LOOP. */
803 static struct iv *
804 alloc_iv (tree base, tree step)
806 struct iv *iv = xcalloc (1, sizeof (struct iv));
808 if (step && integer_zerop (step))
809 step = NULL_TREE;
811 iv->base = base;
812 iv->base_object = determine_base_object (base);
813 iv->step = step;
814 iv->biv_p = false;
815 iv->have_use_for = false;
816 iv->use_id = 0;
817 iv->ssa_name = NULL_TREE;
819 return iv;
822 /* Sets STEP and BASE for induction variable IV. */
824 static void
825 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
827 struct version_info *info = name_info (data, iv);
829 gcc_assert (!info->iv);
831 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
832 info->iv = alloc_iv (base, step);
833 info->iv->ssa_name = iv;
836 /* Finds induction variable declaration for VAR. */
838 static struct iv *
839 get_iv (struct ivopts_data *data, tree var)
841 basic_block bb;
843 if (!name_info (data, var)->iv)
845 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
847 if (!bb
848 || !flow_bb_inside_loop_p (data->current_loop, bb))
849 set_iv (data, var, var, NULL_TREE);
852 return name_info (data, var)->iv;
855 /* Determines the step of a biv defined in PHI. */
857 static tree
858 determine_biv_step (tree phi)
860 struct loop *loop = bb_for_stmt (phi)->loop_father;
861 tree name = PHI_RESULT (phi), base, step;
862 tree type = TREE_TYPE (name);
864 if (!is_gimple_reg (name))
865 return NULL_TREE;
867 if (!simple_iv (loop, phi, name, &base, &step))
868 return NULL_TREE;
870 if (!step)
871 return build_int_cst (type, 0);
873 return step;
876 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
878 static bool
879 abnormal_ssa_name_p (tree exp)
881 if (!exp)
882 return false;
884 if (TREE_CODE (exp) != SSA_NAME)
885 return false;
887 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
890 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
891 abnormal phi node. Callback for for_each_index. */
893 static bool
894 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
895 void *data ATTRIBUTE_UNUSED)
897 if (TREE_CODE (base) == ARRAY_REF)
899 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
900 return false;
901 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
902 return false;
905 return !abnormal_ssa_name_p (*index);
908 /* Returns true if EXPR contains a ssa name that occurs in an
909 abnormal phi node. */
911 static bool
912 contains_abnormal_ssa_name_p (tree expr)
914 enum tree_code code = TREE_CODE (expr);
915 enum tree_code_class class = TREE_CODE_CLASS (code);
917 if (code == SSA_NAME)
918 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
920 if (code == INTEGER_CST
921 || is_gimple_min_invariant (expr))
922 return false;
924 if (code == ADDR_EXPR)
925 return !for_each_index (&TREE_OPERAND (expr, 0),
926 idx_contains_abnormal_ssa_name_p,
927 NULL);
929 switch (class)
931 case tcc_binary:
932 case tcc_comparison:
933 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
934 return true;
936 /* Fallthru. */
937 case tcc_unary:
938 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
939 return true;
941 break;
943 default:
944 gcc_unreachable ();
947 return false;
950 /* Finds basic ivs. */
952 static bool
953 find_bivs (struct ivopts_data *data)
955 tree phi, step, type, base;
956 bool found = false;
957 struct loop *loop = data->current_loop;
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
961 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
962 continue;
964 step = determine_biv_step (phi);
966 if (!step)
967 continue;
968 if (cst_and_fits_in_hwi (step)
969 && int_cst_value (step) == 0)
970 continue;
972 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
973 if (contains_abnormal_ssa_name_p (base))
974 continue;
976 type = TREE_TYPE (PHI_RESULT (phi));
977 base = fold_convert (type, base);
978 step = fold_convert (type, step);
980 /* FIXME: We do not handle induction variables whose step does
981 not satisfy cst_and_fits_in_hwi. */
982 if (!cst_and_fits_in_hwi (step))
983 continue;
985 set_iv (data, PHI_RESULT (phi), base, step);
986 found = true;
989 return found;
992 /* Marks basic ivs. */
994 static void
995 mark_bivs (struct ivopts_data *data)
997 tree phi, var;
998 struct iv *iv, *incr_iv;
999 struct loop *loop = data->current_loop;
1000 basic_block incr_bb;
1002 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004 iv = get_iv (data, PHI_RESULT (phi));
1005 if (!iv)
1006 continue;
1008 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1009 incr_iv = get_iv (data, var);
1010 if (!incr_iv)
1011 continue;
1013 /* If the increment is in the subloop, ignore it. */
1014 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1015 if (incr_bb->loop_father != data->current_loop
1016 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1017 continue;
1019 iv->biv_p = true;
1020 incr_iv->biv_p = true;
1024 /* Checks whether STMT defines a linear induction variable and stores its
1025 parameters to BASE and STEP. */
1027 static bool
1028 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1029 tree *base, tree *step)
1031 tree lhs;
1032 struct loop *loop = data->current_loop;
1034 *base = NULL_TREE;
1035 *step = NULL_TREE;
1037 if (TREE_CODE (stmt) != MODIFY_EXPR)
1038 return false;
1040 lhs = TREE_OPERAND (stmt, 0);
1041 if (TREE_CODE (lhs) != SSA_NAME)
1042 return false;
1044 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1045 return false;
1047 /* FIXME: We do not handle induction variables whose step does
1048 not satisfy cst_and_fits_in_hwi. */
1049 if (!zero_p (*step)
1050 && !cst_and_fits_in_hwi (*step))
1051 return false;
1053 if (contains_abnormal_ssa_name_p (*base))
1054 return false;
1056 return true;
1059 /* Finds general ivs in statement STMT. */
1061 static void
1062 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1064 tree base, step;
1066 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1067 return;
1069 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1072 /* Finds general ivs in basic block BB. */
1074 static void
1075 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1077 block_stmt_iterator bsi;
1079 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1080 find_givs_in_stmt (data, bsi_stmt (bsi));
1083 /* Finds general ivs. */
1085 static void
1086 find_givs (struct ivopts_data *data)
1088 struct loop *loop = data->current_loop;
1089 basic_block *body = get_loop_body_in_dom_order (loop);
1090 unsigned i;
1092 for (i = 0; i < loop->num_nodes; i++)
1093 find_givs_in_bb (data, body[i]);
1094 free (body);
1097 /* For each ssa name defined in LOOP determines whether it is an induction
1098 variable and if so, its initial value and step. */
1100 static bool
1101 find_induction_variables (struct ivopts_data *data)
1103 unsigned i;
1104 bitmap_iterator bi;
1106 if (!find_bivs (data))
1107 return false;
1109 find_givs (data);
1110 mark_bivs (data);
1112 if (dump_file && (dump_flags & TDF_DETAILS))
1114 struct tree_niter_desc *niter;
1116 niter = niter_for_single_dom_exit (data);
1118 if (niter)
1120 fprintf (dump_file, " number of iterations ");
1121 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1122 fprintf (dump_file, "\n");
1124 fprintf (dump_file, " may be zero if ");
1125 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1126 fprintf (dump_file, "\n");
1127 fprintf (dump_file, "\n");
1130 fprintf (dump_file, "Induction variables:\n\n");
1132 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1134 if (ver_info (data, i)->iv)
1135 dump_iv (dump_file, ver_info (data, i)->iv);
1139 return true;
1142 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1144 static struct iv_use *
1145 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1146 tree stmt, enum use_type use_type)
1148 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1150 use->id = n_iv_uses (data);
1151 use->type = use_type;
1152 use->iv = iv;
1153 use->stmt = stmt;
1154 use->op_p = use_p;
1155 use->related_cands = BITMAP_ALLOC (NULL);
1157 /* To avoid showing ssa name in the dumps, if it was not reset by the
1158 caller. */
1159 iv->ssa_name = NULL_TREE;
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 dump_use (dump_file, use);
1164 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1166 return use;
1169 /* Checks whether OP is a loop-level invariant and if so, records it.
1170 NONLINEAR_USE is true if the invariant is used in a way we do not
1171 handle specially. */
1173 static void
1174 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1176 basic_block bb;
1177 struct version_info *info;
1179 if (TREE_CODE (op) != SSA_NAME
1180 || !is_gimple_reg (op))
1181 return;
1183 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1184 if (bb
1185 && flow_bb_inside_loop_p (data->current_loop, bb))
1186 return;
1188 info = name_info (data, op);
1189 info->name = op;
1190 info->has_nonlin_use |= nonlinear_use;
1191 if (!info->inv_id)
1192 info->inv_id = ++data->max_inv_id;
1193 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1196 /* Checks whether the use OP is interesting and if so, records it
1197 as TYPE. */
1199 static struct iv_use *
1200 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1201 enum use_type type)
1203 struct iv *iv;
1204 struct iv *civ;
1205 tree stmt;
1206 struct iv_use *use;
1208 if (TREE_CODE (op) != SSA_NAME)
1209 return NULL;
1211 iv = get_iv (data, op);
1212 if (!iv)
1213 return NULL;
1215 if (iv->have_use_for)
1217 use = iv_use (data, iv->use_id);
1219 gcc_assert (use->type == USE_NONLINEAR_EXPR
1220 || use->type == USE_OUTER);
1222 if (type == USE_NONLINEAR_EXPR)
1223 use->type = USE_NONLINEAR_EXPR;
1224 return use;
1227 if (zero_p (iv->step))
1229 record_invariant (data, op, true);
1230 return NULL;
1232 iv->have_use_for = true;
1234 civ = xmalloc (sizeof (struct iv));
1235 *civ = *iv;
1237 stmt = SSA_NAME_DEF_STMT (op);
1238 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1239 || TREE_CODE (stmt) == MODIFY_EXPR);
1241 use = record_use (data, NULL, civ, stmt, type);
1242 iv->use_id = use->id;
1244 return use;
1247 /* Checks whether the use OP is interesting and if so, records it. */
1249 static struct iv_use *
1250 find_interesting_uses_op (struct ivopts_data *data, tree op)
1252 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1255 /* Records a definition of induction variable OP that is used outside of the
1256 loop. */
1258 static struct iv_use *
1259 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1261 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1264 /* Checks whether the condition *COND_P in STMT is interesting
1265 and if so, records it. */
1267 static void
1268 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1270 tree *op0_p;
1271 tree *op1_p;
1272 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1273 struct iv const_iv;
1274 tree zero = integer_zero_node;
1276 const_iv.step = NULL_TREE;
1278 if (integer_zerop (*cond_p)
1279 || integer_nonzerop (*cond_p))
1280 return;
1282 if (TREE_CODE (*cond_p) == SSA_NAME)
1284 op0_p = cond_p;
1285 op1_p = &zero;
1287 else
1289 op0_p = &TREE_OPERAND (*cond_p, 0);
1290 op1_p = &TREE_OPERAND (*cond_p, 1);
1293 if (TREE_CODE (*op0_p) == SSA_NAME)
1294 iv0 = get_iv (data, *op0_p);
1295 else
1296 iv0 = &const_iv;
1298 if (TREE_CODE (*op1_p) == SSA_NAME)
1299 iv1 = get_iv (data, *op1_p);
1300 else
1301 iv1 = &const_iv;
1303 if (/* When comparing with non-invariant value, we may not do any senseful
1304 induction variable elimination. */
1305 (!iv0 || !iv1)
1306 /* Eliminating condition based on two ivs would be nontrivial.
1307 ??? TODO -- it is not really important to handle this case. */
1308 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1310 find_interesting_uses_op (data, *op0_p);
1311 find_interesting_uses_op (data, *op1_p);
1312 return;
1315 if (zero_p (iv0->step) && zero_p (iv1->step))
1317 /* If both are invariants, this is a work for unswitching. */
1318 return;
1321 civ = xmalloc (sizeof (struct iv));
1322 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1323 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1326 /* Returns true if expression EXPR is obviously invariant in LOOP,
1327 i.e. if all its operands are defined outside of the LOOP. */
1329 bool
1330 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1332 basic_block def_bb;
1333 unsigned i, len;
1335 if (is_gimple_min_invariant (expr))
1336 return true;
1338 if (TREE_CODE (expr) == SSA_NAME)
1340 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1341 if (def_bb
1342 && flow_bb_inside_loop_p (loop, def_bb))
1343 return false;
1345 return true;
1348 if (!EXPR_P (expr))
1349 return false;
1351 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1352 for (i = 0; i < len; i++)
1353 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1354 return false;
1356 return true;
1359 /* Cumulates the steps of indices into DATA and replaces their values with the
1360 initial ones. Returns false when the value of the index cannot be determined.
1361 Callback for for_each_index. */
1363 struct ifs_ivopts_data
1365 struct ivopts_data *ivopts_data;
1366 tree stmt;
1367 tree *step_p;
1370 static bool
1371 idx_find_step (tree base, tree *idx, void *data)
1373 struct ifs_ivopts_data *dta = data;
1374 struct iv *iv;
1375 tree step, type, iv_type, iv_step, lbound, off;
1376 struct loop *loop = dta->ivopts_data->current_loop;
1378 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1379 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1380 return false;
1382 /* If base is a component ref, require that the offset of the reference
1383 be invariant. */
1384 if (TREE_CODE (base) == COMPONENT_REF)
1386 off = component_ref_field_offset (base);
1387 return expr_invariant_in_loop_p (loop, off);
1390 /* If base is array, first check whether we will be able to move the
1391 reference out of the loop (in order to take its address in strength
1392 reduction). In order for this to work we need both lower bound
1393 and step to be loop invariants. */
1394 if (TREE_CODE (base) == ARRAY_REF)
1396 step = array_ref_element_size (base);
1397 lbound = array_ref_low_bound (base);
1399 if (!expr_invariant_in_loop_p (loop, step)
1400 || !expr_invariant_in_loop_p (loop, lbound))
1401 return false;
1404 if (TREE_CODE (*idx) != SSA_NAME)
1405 return true;
1407 iv = get_iv (dta->ivopts_data, *idx);
1408 if (!iv)
1409 return false;
1411 *idx = iv->base;
1413 if (!iv->step)
1414 return true;
1416 iv_type = TREE_TYPE (iv->base);
1417 type = build_pointer_type (TREE_TYPE (base));
1418 if (TREE_CODE (base) == ARRAY_REF)
1420 step = array_ref_element_size (base);
1422 /* We only handle addresses whose step is an integer constant. */
1423 if (TREE_CODE (step) != INTEGER_CST)
1424 return false;
1426 else
1427 /* The step for pointer arithmetics already is 1 byte. */
1428 step = build_int_cst (type, 1);
1430 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1431 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1432 type, iv->base, iv->step, dta->stmt);
1433 else
1434 iv_step = fold_convert (iv_type, iv->step);
1436 if (!iv_step)
1438 /* The index might wrap. */
1439 return false;
1442 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1444 if (!*dta->step_p)
1445 *dta->step_p = step;
1446 else
1447 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1448 *dta->step_p, step);
1450 return true;
1453 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1454 object is passed to it in DATA. */
1456 static bool
1457 idx_record_use (tree base, tree *idx,
1458 void *data)
1460 find_interesting_uses_op (data, *idx);
1461 if (TREE_CODE (base) == ARRAY_REF)
1463 find_interesting_uses_op (data, array_ref_element_size (base));
1464 find_interesting_uses_op (data, array_ref_low_bound (base));
1466 return true;
1469 /* Returns true if memory reference REF may be unaligned. */
1471 static bool
1472 may_be_unaligned_p (tree ref)
1474 tree base;
1475 tree base_type;
1476 HOST_WIDE_INT bitsize;
1477 HOST_WIDE_INT bitpos;
1478 tree toffset;
1479 enum machine_mode mode;
1480 int unsignedp, volatilep;
1481 unsigned base_align;
1483 /* The test below is basically copy of what expr.c:normal_inner_ref
1484 does to check whether the object must be loaded by parts when
1485 STRICT_ALIGNMENT is true. */
1486 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1487 &unsignedp, &volatilep, true);
1488 base_type = TREE_TYPE (base);
1489 base_align = TYPE_ALIGN (base_type);
1491 if (mode != BLKmode
1492 && (base_align < GET_MODE_ALIGNMENT (mode)
1493 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1494 || bitpos % BITS_PER_UNIT != 0))
1495 return true;
1497 return false;
1500 /* Finds addresses in *OP_P inside STMT. */
1502 static void
1503 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505 tree base = unshare_expr (*op_p), step = NULL;
1506 struct iv *civ;
1507 struct ifs_ivopts_data ifs_ivopts_data;
1509 /* Ignore bitfields for now. Not really something terribly complicated
1510 to handle. TODO. */
1511 if (TREE_CODE (base) == COMPONENT_REF
1512 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1513 goto fail;
1515 if (STRICT_ALIGNMENT
1516 && may_be_unaligned_p (base))
1517 goto fail;
1519 ifs_ivopts_data.ivopts_data = data;
1520 ifs_ivopts_data.stmt = stmt;
1521 ifs_ivopts_data.step_p = &step;
1522 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1523 || zero_p (step))
1524 goto fail;
1526 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1527 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1529 if (TREE_CODE (base) == INDIRECT_REF)
1530 base = TREE_OPERAND (base, 0);
1531 else
1532 base = build_addr (base);
1534 civ = alloc_iv (base, step);
1535 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1536 return;
1538 fail:
1539 for_each_index (op_p, idx_record_use, data);
1542 /* Finds and records invariants used in STMT. */
1544 static void
1545 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1547 use_optype uses = NULL;
1548 unsigned i, n;
1549 tree op;
1551 if (TREE_CODE (stmt) == PHI_NODE)
1552 n = PHI_NUM_ARGS (stmt);
1553 else
1555 get_stmt_operands (stmt);
1556 uses = STMT_USE_OPS (stmt);
1557 n = NUM_USES (uses);
1560 for (i = 0; i < n; i++)
1562 if (TREE_CODE (stmt) == PHI_NODE)
1563 op = PHI_ARG_DEF (stmt, i);
1564 else
1565 op = USE_OP (uses, i);
1567 record_invariant (data, op, false);
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1573 static void
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 struct iv *iv;
1577 tree op, lhs, rhs;
1578 use_optype uses = NULL;
1579 unsigned i, n;
1581 find_invariants_stmt (data, stmt);
1583 if (TREE_CODE (stmt) == COND_EXPR)
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586 return;
1589 if (TREE_CODE (stmt) == MODIFY_EXPR)
1591 lhs = TREE_OPERAND (stmt, 0);
1592 rhs = TREE_OPERAND (stmt, 1);
1594 if (TREE_CODE (lhs) == SSA_NAME)
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1599 iv = get_iv (data, lhs);
1601 if (iv && !zero_p (iv->step))
1602 return;
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1609 return;
1611 case tcc_reference:
1612 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1613 if (REFERENCE_CLASS_P (lhs))
1614 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1615 return;
1617 default: ;
1620 if (REFERENCE_CLASS_P (lhs)
1621 && is_gimple_val (rhs))
1623 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1624 find_interesting_uses_op (data, rhs);
1625 return;
1628 /* TODO -- we should also handle address uses of type
1630 memory = call (whatever);
1634 call (memory). */
1637 if (TREE_CODE (stmt) == PHI_NODE
1638 && bb_for_stmt (stmt) == data->current_loop->header)
1640 lhs = PHI_RESULT (stmt);
1641 iv = get_iv (data, lhs);
1643 if (iv && !zero_p (iv->step))
1644 return;
1647 if (TREE_CODE (stmt) == PHI_NODE)
1648 n = PHI_NUM_ARGS (stmt);
1649 else
1651 uses = STMT_USE_OPS (stmt);
1652 n = NUM_USES (uses);
1655 for (i = 0; i < n; i++)
1657 if (TREE_CODE (stmt) == PHI_NODE)
1658 op = PHI_ARG_DEF (stmt, i);
1659 else
1660 op = USE_OP (uses, i);
1662 if (TREE_CODE (op) != SSA_NAME)
1663 continue;
1665 iv = get_iv (data, op);
1666 if (!iv)
1667 continue;
1669 find_interesting_uses_op (data, op);
1673 /* Finds interesting uses of induction variables outside of loops
1674 on loop exit edge EXIT. */
1676 static void
1677 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1679 tree phi, def;
1681 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1683 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1684 find_interesting_uses_outer (data, def);
1688 /* Finds uses of the induction variables that are interesting. */
1690 static void
1691 find_interesting_uses (struct ivopts_data *data)
1693 basic_block bb;
1694 block_stmt_iterator bsi;
1695 tree phi;
1696 basic_block *body = get_loop_body (data->current_loop);
1697 unsigned i;
1698 struct version_info *info;
1699 edge e;
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "Uses:\n\n");
1704 for (i = 0; i < data->current_loop->num_nodes; i++)
1706 edge_iterator ei;
1707 bb = body[i];
1709 FOR_EACH_EDGE (e, ei, bb->succs)
1710 if (e->dest != EXIT_BLOCK_PTR
1711 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1712 find_interesting_uses_outside (data, e);
1714 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1715 find_interesting_uses_stmt (data, phi);
1716 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1722 bitmap_iterator bi;
1724 fprintf (dump_file, "\n");
1726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1728 info = ver_info (data, i);
1729 if (info->inv_id)
1731 fprintf (dump_file, " ");
1732 print_generic_expr (dump_file, info->name, TDF_SLIM);
1733 fprintf (dump_file, " is invariant (%d)%s\n",
1734 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1738 fprintf (dump_file, "\n");
1741 free (body);
1744 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1745 is true, assume we are inside an address. */
1747 static tree
1748 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1750 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1751 enum tree_code code;
1752 tree type, orig_type = TREE_TYPE (expr);
1753 unsigned HOST_WIDE_INT off0, off1, st;
1754 tree orig_expr = expr;
1756 STRIP_NOPS (expr);
1757 type = TREE_TYPE (expr);
1758 code = TREE_CODE (expr);
1759 *offset = 0;
1761 switch (code)
1763 case INTEGER_CST:
1764 if (!cst_and_fits_in_hwi (expr)
1765 || zero_p (expr))
1766 return orig_expr;
1768 *offset = int_cst_value (expr);
1769 return build_int_cst_type (orig_type, 0);
1771 case PLUS_EXPR:
1772 case MINUS_EXPR:
1773 op0 = TREE_OPERAND (expr, 0);
1774 op1 = TREE_OPERAND (expr, 1);
1776 op0 = strip_offset (op0, false, &off0);
1777 op1 = strip_offset (op1, false, &off1);
1779 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1780 if (op0 == TREE_OPERAND (expr, 0)
1781 && op1 == TREE_OPERAND (expr, 1))
1782 return orig_expr;
1784 if (zero_p (op1))
1785 expr = op0;
1786 else if (zero_p (op0))
1788 if (code == PLUS_EXPR)
1789 expr = op1;
1790 else
1791 expr = build1 (NEGATE_EXPR, type, op1);
1793 else
1794 expr = build2 (code, type, op0, op1);
1796 return fold_convert (orig_type, expr);
1798 case ARRAY_REF:
1799 if (!inside_addr)
1800 return orig_expr;
1802 step = array_ref_element_size (expr);
1803 if (!cst_and_fits_in_hwi (step))
1804 break;
1806 st = int_cst_value (step);
1807 op1 = TREE_OPERAND (expr, 1);
1808 op1 = strip_offset (op1, false, &off1);
1809 *offset = off1 * st;
1810 break;
1812 case COMPONENT_REF:
1813 if (!inside_addr)
1814 return orig_expr;
1815 break;
1817 case ADDR_EXPR:
1818 inside_addr = true;
1819 break;
1821 default:
1822 return orig_expr;
1825 /* Default handling of expressions for that we want to recurse into
1826 the first operand. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset (op0, inside_addr, &off0);
1829 *offset += off0;
1831 if (op0 == TREE_OPERAND (expr, 0)
1832 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1833 return orig_expr;
1835 expr = copy_node (expr);
1836 TREE_OPERAND (expr, 0) = op0;
1837 if (op1)
1838 TREE_OPERAND (expr, 1) = op1;
1840 return fold_convert (orig_type, expr);
1843 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1844 position to POS. If USE is not NULL, the candidate is set as related to
1845 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1846 replacement of the final value of the iv by a direct computation. */
1848 static struct iv_cand *
1849 add_candidate_1 (struct ivopts_data *data,
1850 tree base, tree step, bool important, enum iv_position pos,
1851 struct iv_use *use, tree incremented_at)
1853 unsigned i;
1854 struct iv_cand *cand = NULL;
1855 tree type;
1857 if (base)
1859 type = TREE_TYPE (base);
1860 if (!TYPE_UNSIGNED (type))
1862 type = unsigned_type_for (type);
1863 base = fold_convert (type, base);
1864 if (step)
1865 step = fold_convert (type, step);
1869 for (i = 0; i < n_iv_cands (data); i++)
1871 cand = iv_cand (data, i);
1873 if (cand->pos != pos)
1874 continue;
1876 if (cand->incremented_at != incremented_at)
1877 continue;
1879 if (!cand->iv)
1881 if (!base && !step)
1882 break;
1884 continue;
1887 if (!base && !step)
1888 continue;
1890 if (!operand_equal_p (base, cand->iv->base, 0))
1891 continue;
1893 if (zero_p (cand->iv->step))
1895 if (zero_p (step))
1896 break;
1898 else
1900 if (step && operand_equal_p (step, cand->iv->step, 0))
1901 break;
1905 if (i == n_iv_cands (data))
1907 cand = xcalloc (1, sizeof (struct iv_cand));
1908 cand->id = i;
1910 if (!base && !step)
1911 cand->iv = NULL;
1912 else
1913 cand->iv = alloc_iv (base, step);
1915 cand->pos = pos;
1916 if (pos != IP_ORIGINAL && cand->iv)
1918 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1919 cand->var_after = cand->var_before;
1921 cand->important = important;
1922 cand->incremented_at = incremented_at;
1923 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1925 if (dump_file && (dump_flags & TDF_DETAILS))
1926 dump_cand (dump_file, cand);
1929 if (important && !cand->important)
1931 cand->important = true;
1932 if (dump_file && (dump_flags & TDF_DETAILS))
1933 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1936 if (use)
1938 bitmap_set_bit (use->related_cands, i);
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 fprintf (dump_file, "Candidate %d is related to use %d\n",
1941 cand->id, use->id);
1944 return cand;
1947 /* Returns true if incrementing the induction variable at the end of the LOOP
1948 is allowed.
1950 The purpose is to avoid splitting latch edge with a biv increment, thus
1951 creating a jump, possibly confusing other optimization passes and leaving
1952 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1953 is not available (so we do not have a better alternative), or if the latch
1954 edge is already nonempty. */
1956 static bool
1957 allow_ip_end_pos_p (struct loop *loop)
1959 if (!ip_normal_pos (loop))
1960 return true;
1962 if (!empty_block_p (ip_end_pos (loop)))
1963 return true;
1965 return false;
1968 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1969 position to POS. If USE is not NULL, the candidate is set as related to
1970 it. The candidate computation is scheduled on all available positions. */
1972 static void
1973 add_candidate (struct ivopts_data *data,
1974 tree base, tree step, bool important, struct iv_use *use)
1976 if (ip_normal_pos (data->current_loop))
1977 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1978 if (ip_end_pos (data->current_loop)
1979 && allow_ip_end_pos_p (data->current_loop))
1980 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1983 /* Adds standard iv candidates. */
1985 static void
1986 add_standard_iv_candidates (struct ivopts_data *data)
1988 /* Add 0 + 1 * iteration candidate. */
1989 add_candidate (data,
1990 build_int_cst (unsigned_intSI_type_node, 0),
1991 build_int_cst (unsigned_intSI_type_node, 1),
1992 true, NULL);
1994 /* The same for a long type if it is still fast enough. */
1995 if (BITS_PER_WORD > 32)
1996 add_candidate (data,
1997 build_int_cst (unsigned_intDI_type_node, 0),
1998 build_int_cst (unsigned_intDI_type_node, 1),
1999 true, NULL);
2003 /* Adds candidates bases on the old induction variable IV. */
2005 static void
2006 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2008 tree phi, def;
2009 struct iv_cand *cand;
2011 add_candidate (data, iv->base, iv->step, true, NULL);
2013 /* The same, but with initial value zero. */
2014 add_candidate (data,
2015 build_int_cst (TREE_TYPE (iv->base), 0),
2016 iv->step, true, NULL);
2018 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2019 if (TREE_CODE (phi) == PHI_NODE)
2021 /* Additionally record the possibility of leaving the original iv
2022 untouched. */
2023 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2024 cand = add_candidate_1 (data,
2025 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2026 SSA_NAME_DEF_STMT (def));
2027 cand->var_before = iv->ssa_name;
2028 cand->var_after = def;
2032 /* Adds candidates based on the old induction variables. */
2034 static void
2035 add_old_ivs_candidates (struct ivopts_data *data)
2037 unsigned i;
2038 struct iv *iv;
2039 bitmap_iterator bi;
2041 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2043 iv = ver_info (data, i)->iv;
2044 if (iv && iv->biv_p && !zero_p (iv->step))
2045 add_old_iv_candidates (data, iv);
2049 /* Adds candidates based on the value of the induction variable IV and USE. */
2051 static void
2052 add_iv_value_candidates (struct ivopts_data *data,
2053 struct iv *iv, struct iv_use *use)
2055 add_candidate (data, iv->base, iv->step, false, use);
2057 /* The same, but with initial value zero. */
2058 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2059 iv->step, false, use);
2062 /* Adds candidates based on the address IV and USE. */
2064 static void
2065 add_address_candidates (struct ivopts_data *data,
2066 struct iv *iv, struct iv_use *use)
2068 tree base, abase;
2069 unsigned HOST_WIDE_INT offset;
2071 /* First, the trivial choices. */
2072 add_iv_value_candidates (data, iv, use);
2074 /* Second, try removing the COMPONENT_REFs. */
2075 if (TREE_CODE (iv->base) == ADDR_EXPR)
2077 base = TREE_OPERAND (iv->base, 0);
2078 while (TREE_CODE (base) == COMPONENT_REF
2079 || (TREE_CODE (base) == ARRAY_REF
2080 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2081 base = TREE_OPERAND (base, 0);
2083 if (base != TREE_OPERAND (iv->base, 0))
2085 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2086 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2088 if (TREE_CODE (base) == INDIRECT_REF)
2089 base = TREE_OPERAND (base, 0);
2090 else
2091 base = build_addr (base);
2092 add_candidate (data, base, iv->step, false, use);
2096 /* Third, try removing the constant offset. */
2097 abase = iv->base;
2098 base = strip_offset (abase, false, &offset);
2099 if (offset)
2100 add_candidate (data, base, iv->step, false, use);
2103 /* Possibly adds pseudocandidate for replacing the final value of USE by
2104 a direct computation. */
2106 static void
2107 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2109 struct tree_niter_desc *niter;
2111 /* We must know where we exit the loop and how many times does it roll. */
2112 niter = niter_for_single_dom_exit (data);
2113 if (!niter
2114 || !zero_p (niter->may_be_zero))
2115 return;
2117 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2120 /* Adds candidates based on the uses. */
2122 static void
2123 add_derived_ivs_candidates (struct ivopts_data *data)
2125 unsigned i;
2127 for (i = 0; i < n_iv_uses (data); i++)
2129 struct iv_use *use = iv_use (data, i);
2131 if (!use)
2132 continue;
2134 switch (use->type)
2136 case USE_NONLINEAR_EXPR:
2137 case USE_COMPARE:
2138 /* Just add the ivs based on the value of the iv used here. */
2139 add_iv_value_candidates (data, use->iv, use);
2140 break;
2142 case USE_OUTER:
2143 add_iv_value_candidates (data, use->iv, use);
2145 /* Additionally, add the pseudocandidate for the possibility to
2146 replace the final value by a direct computation. */
2147 add_iv_outer_candidates (data, use);
2148 break;
2150 case USE_ADDRESS:
2151 add_address_candidates (data, use->iv, use);
2152 break;
2154 default:
2155 gcc_unreachable ();
2160 /* Record important candidates and add them to related_cands bitmaps
2161 if needed. */
2163 static void
2164 record_important_candidates (struct ivopts_data *data)
2166 unsigned i;
2167 struct iv_use *use;
2169 for (i = 0; i < n_iv_cands (data); i++)
2171 struct iv_cand *cand = iv_cand (data, i);
2173 if (cand->important)
2174 bitmap_set_bit (data->important_candidates, i);
2177 data->consider_all_candidates = (n_iv_cands (data)
2178 <= CONSIDER_ALL_CANDIDATES_BOUND);
2180 if (data->consider_all_candidates)
2182 /* We will not need "related_cands" bitmaps in this case,
2183 so release them to decrease peak memory consumption. */
2184 for (i = 0; i < n_iv_uses (data); i++)
2186 use = iv_use (data, i);
2187 BITMAP_FREE (use->related_cands);
2190 else
2192 /* Add important candidates to the related_cands bitmaps. */
2193 for (i = 0; i < n_iv_uses (data); i++)
2194 bitmap_ior_into (iv_use (data, i)->related_cands,
2195 data->important_candidates);
2199 /* Finds the candidates for the induction variables. */
2201 static void
2202 find_iv_candidates (struct ivopts_data *data)
2204 /* Add commonly used ivs. */
2205 add_standard_iv_candidates (data);
2207 /* Add old induction variables. */
2208 add_old_ivs_candidates (data);
2210 /* Add induction variables derived from uses. */
2211 add_derived_ivs_candidates (data);
2213 /* Record the important candidates. */
2214 record_important_candidates (data);
2217 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2218 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2219 we allocate a simple list to every use. */
2221 static void
2222 alloc_use_cost_map (struct ivopts_data *data)
2224 unsigned i, size, s, j;
2226 for (i = 0; i < n_iv_uses (data); i++)
2228 struct iv_use *use = iv_use (data, i);
2229 bitmap_iterator bi;
2231 if (data->consider_all_candidates)
2232 size = n_iv_cands (data);
2233 else
2235 s = 0;
2236 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2238 s++;
2241 /* Round up to the power of two, so that moduling by it is fast. */
2242 for (size = 1; size < s; size <<= 1)
2243 continue;
2246 use->n_map_members = size;
2247 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2252 on invariants DEPENDS_ON. */
2254 static void
2255 set_use_iv_cost (struct ivopts_data *data,
2256 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2257 bitmap depends_on)
2259 unsigned i, s;
2261 if (cost == INFTY)
2263 BITMAP_FREE (depends_on);
2264 return;
2267 if (data->consider_all_candidates)
2269 use->cost_map[cand->id].cand = cand;
2270 use->cost_map[cand->id].cost = cost;
2271 use->cost_map[cand->id].depends_on = depends_on;
2272 return;
2275 /* n_map_members is a power of two, so this computes modulo. */
2276 s = cand->id & (use->n_map_members - 1);
2277 for (i = s; i < use->n_map_members; i++)
2278 if (!use->cost_map[i].cand)
2279 goto found;
2280 for (i = 0; i < s; i++)
2281 if (!use->cost_map[i].cand)
2282 goto found;
2284 gcc_unreachable ();
2286 found:
2287 use->cost_map[i].cand = cand;
2288 use->cost_map[i].cost = cost;
2289 use->cost_map[i].depends_on = depends_on;
2292 /* Gets cost of (USE, CANDIDATE) pair. */
2294 static struct cost_pair *
2295 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2296 struct iv_cand *cand)
2298 unsigned i, s;
2299 struct cost_pair *ret;
2301 if (!cand)
2302 return NULL;
2304 if (data->consider_all_candidates)
2306 ret = use->cost_map + cand->id;
2307 if (!ret->cand)
2308 return NULL;
2310 return ret;
2313 /* n_map_members is a power of two, so this computes modulo. */
2314 s = cand->id & (use->n_map_members - 1);
2315 for (i = s; i < use->n_map_members; i++)
2316 if (use->cost_map[i].cand == cand)
2317 return use->cost_map + i;
2319 for (i = 0; i < s; i++)
2320 if (use->cost_map[i].cand == cand)
2321 return use->cost_map + i;
2323 return NULL;
2326 /* Returns estimate on cost of computing SEQ. */
2328 static unsigned
2329 seq_cost (rtx seq)
2331 unsigned cost = 0;
2332 rtx set;
2334 for (; seq; seq = NEXT_INSN (seq))
2336 set = single_set (seq);
2337 if (set)
2338 cost += rtx_cost (set, SET);
2339 else
2340 cost++;
2343 return cost;
2346 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2347 static rtx
2348 produce_memory_decl_rtl (tree obj, int *regno)
2350 rtx x;
2351 if (!obj)
2352 abort ();
2353 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2355 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2356 x = gen_rtx_SYMBOL_REF (Pmode, name);
2358 else
2359 x = gen_raw_REG (Pmode, (*regno)++);
2361 return gen_rtx_MEM (DECL_MODE (obj), x);
2364 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2365 walk_tree. DATA contains the actual fake register number. */
2367 static tree
2368 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2370 tree obj = NULL_TREE;
2371 rtx x = NULL_RTX;
2372 int *regno = data;
2374 switch (TREE_CODE (*expr_p))
2376 case ADDR_EXPR:
2377 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2378 handled_component_p (*expr_p);
2379 expr_p = &TREE_OPERAND (*expr_p, 0))
2380 continue;
2381 obj = *expr_p;
2382 if (DECL_P (obj))
2383 x = produce_memory_decl_rtl (obj, regno);
2384 break;
2386 case SSA_NAME:
2387 *ws = 0;
2388 obj = SSA_NAME_VAR (*expr_p);
2389 if (!DECL_RTL_SET_P (obj))
2390 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2391 break;
2393 case VAR_DECL:
2394 case PARM_DECL:
2395 case RESULT_DECL:
2396 *ws = 0;
2397 obj = *expr_p;
2399 if (DECL_RTL_SET_P (obj))
2400 break;
2402 if (DECL_MODE (obj) == BLKmode)
2403 x = produce_memory_decl_rtl (obj, regno);
2404 else
2405 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2407 break;
2409 default:
2410 break;
2413 if (x)
2415 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2416 SET_DECL_RTL (obj, x);
2419 return NULL_TREE;
2422 /* Determines cost of the computation of EXPR. */
2424 static unsigned
2425 computation_cost (tree expr)
2427 rtx seq, rslt;
2428 tree type = TREE_TYPE (expr);
2429 unsigned cost;
2430 int regno = 0;
2432 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2433 start_sequence ();
2434 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2435 seq = get_insns ();
2436 end_sequence ();
2438 cost = seq_cost (seq);
2439 if (GET_CODE (rslt) == MEM)
2440 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2442 return cost;
2445 /* Returns variable containing the value of candidate CAND at statement AT. */
2447 static tree
2448 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2450 if (stmt_after_increment (loop, cand, stmt))
2451 return cand->var_after;
2452 else
2453 return cand->var_before;
2456 /* Determines the expression by that USE is expressed from induction variable
2457 CAND at statement AT in LOOP. */
2459 static tree
2460 get_computation_at (struct loop *loop,
2461 struct iv_use *use, struct iv_cand *cand, tree at)
2463 tree ubase = use->iv->base;
2464 tree ustep = use->iv->step;
2465 tree cbase = cand->iv->base;
2466 tree cstep = cand->iv->step;
2467 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2468 tree uutype;
2469 tree expr, delta;
2470 tree ratio;
2471 unsigned HOST_WIDE_INT ustepi, cstepi;
2472 HOST_WIDE_INT ratioi;
2474 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2476 /* We do not have a precision to express the values of use. */
2477 return NULL_TREE;
2480 expr = var_at_stmt (loop, cand, at);
2482 if (TREE_TYPE (expr) != ctype)
2484 /* This may happen with the original ivs. */
2485 expr = fold_convert (ctype, expr);
2488 if (TYPE_UNSIGNED (utype))
2489 uutype = utype;
2490 else
2492 uutype = unsigned_type_for (utype);
2493 ubase = fold_convert (uutype, ubase);
2494 ustep = fold_convert (uutype, ustep);
2497 if (uutype != ctype)
2499 expr = fold_convert (uutype, expr);
2500 cbase = fold_convert (uutype, cbase);
2501 cstep = fold_convert (uutype, cstep);
2504 if (!cst_and_fits_in_hwi (cstep)
2505 || !cst_and_fits_in_hwi (ustep))
2506 return NULL_TREE;
2508 ustepi = int_cst_value (ustep);
2509 cstepi = int_cst_value (cstep);
2511 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2513 /* TODO maybe consider case when ustep divides cstep and the ratio is
2514 a power of 2 (so that the division is fast to execute)? We would
2515 need to be much more careful with overflows etc. then. */
2516 return NULL_TREE;
2519 /* We may need to shift the value if we are after the increment. */
2520 if (stmt_after_increment (loop, cand, at))
2521 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2523 /* use = ubase - ratio * cbase + ratio * var.
2525 In general case ubase + ratio * (var - cbase) could be better (one less
2526 multiplication), but often it is possible to eliminate redundant parts
2527 of computations from (ubase - ratio * cbase) term, and if it does not
2528 happen, fold is able to apply the distributive law to obtain this form
2529 anyway. */
2531 if (ratioi == 1)
2533 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2534 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2536 else if (ratioi == -1)
2538 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2539 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2541 else
2543 ratio = build_int_cst_type (uutype, ratioi);
2544 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2545 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2546 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2547 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2550 return fold_convert (utype, expr);
2553 /* Determines the expression by that USE is expressed from induction variable
2554 CAND in LOOP. */
2556 static tree
2557 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2559 return get_computation_at (loop, use, cand, use->stmt);
2562 /* Returns cost of addition in MODE. */
2564 static unsigned
2565 add_cost (enum machine_mode mode)
2567 static unsigned costs[NUM_MACHINE_MODES];
2568 rtx seq;
2569 unsigned cost;
2571 if (costs[mode])
2572 return costs[mode];
2574 start_sequence ();
2575 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2576 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2577 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2578 NULL_RTX);
2579 seq = get_insns ();
2580 end_sequence ();
2582 cost = seq_cost (seq);
2583 if (!cost)
2584 cost = 1;
2586 costs[mode] = cost;
2588 if (dump_file && (dump_flags & TDF_DETAILS))
2589 fprintf (dump_file, "Addition in %s costs %d\n",
2590 GET_MODE_NAME (mode), cost);
2591 return cost;
2594 /* Entry in a hashtable of already known costs for multiplication. */
2595 struct mbc_entry
2597 HOST_WIDE_INT cst; /* The constant to multiply by. */
2598 enum machine_mode mode; /* In mode. */
2599 unsigned cost; /* The cost. */
2602 /* Counts hash value for the ENTRY. */
2604 static hashval_t
2605 mbc_entry_hash (const void *entry)
2607 const struct mbc_entry *e = entry;
2609 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2612 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2614 static int
2615 mbc_entry_eq (const void *entry1, const void *entry2)
2617 const struct mbc_entry *e1 = entry1;
2618 const struct mbc_entry *e2 = entry2;
2620 return (e1->mode == e2->mode
2621 && e1->cst == e2->cst);
2624 /* Returns cost of multiplication by constant CST in MODE. */
2626 static unsigned
2627 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2629 static htab_t costs;
2630 struct mbc_entry **cached, act;
2631 rtx seq;
2632 unsigned cost;
2634 if (!costs)
2635 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2637 act.mode = mode;
2638 act.cst = cst;
2639 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2640 if (*cached)
2641 return (*cached)->cost;
2643 *cached = xmalloc (sizeof (struct mbc_entry));
2644 (*cached)->mode = mode;
2645 (*cached)->cst = cst;
2647 start_sequence ();
2648 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2649 NULL_RTX, 0);
2650 seq = get_insns ();
2651 end_sequence ();
2653 cost = seq_cost (seq);
2655 if (dump_file && (dump_flags & TDF_DETAILS))
2656 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2657 (int) cst, GET_MODE_NAME (mode), cost);
2659 (*cached)->cost = cost;
2661 return cost;
2664 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2665 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2666 variable is omitted. The created memory accesses MODE.
2668 TODO -- there must be some better way. This all is quite crude. */
2670 static unsigned
2671 get_address_cost (bool symbol_present, bool var_present,
2672 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2674 #define MAX_RATIO 128
2675 static sbitmap valid_mult;
2676 static HOST_WIDE_INT rat, off;
2677 static HOST_WIDE_INT min_offset, max_offset;
2678 static unsigned costs[2][2][2][2];
2679 unsigned cost, acost;
2680 rtx seq, addr, base;
2681 bool offset_p, ratio_p;
2682 rtx reg1;
2683 HOST_WIDE_INT s_offset;
2684 unsigned HOST_WIDE_INT mask;
2685 unsigned bits;
2687 if (!valid_mult)
2689 HOST_WIDE_INT i;
2691 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2693 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2694 for (i = 1; i <= 1 << 20; i <<= 1)
2696 XEXP (addr, 1) = GEN_INT (i);
2697 if (!memory_address_p (Pmode, addr))
2698 break;
2700 max_offset = i >> 1;
2701 off = max_offset;
2703 for (i = 1; i <= 1 << 20; i <<= 1)
2705 XEXP (addr, 1) = GEN_INT (-i);
2706 if (!memory_address_p (Pmode, addr))
2707 break;
2709 min_offset = -(i >> 1);
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2713 fprintf (dump_file, "get_address_cost:\n");
2714 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2715 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2718 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2719 sbitmap_zero (valid_mult);
2720 rat = 1;
2721 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2722 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2724 XEXP (addr, 1) = GEN_INT (i);
2725 if (memory_address_p (Pmode, addr))
2727 SET_BIT (valid_mult, i + MAX_RATIO);
2728 rat = i;
2732 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, " allowed multipliers:");
2735 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2736 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2737 fprintf (dump_file, " %d", (int) i);
2738 fprintf (dump_file, "\n");
2739 fprintf (dump_file, "\n");
2743 bits = GET_MODE_BITSIZE (Pmode);
2744 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2745 offset &= mask;
2746 if ((offset >> (bits - 1) & 1))
2747 offset |= ~mask;
2748 s_offset = offset;
2750 cost = 0;
2751 offset_p = (s_offset != 0
2752 && min_offset <= s_offset && s_offset <= max_offset);
2753 ratio_p = (ratio != 1
2754 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2755 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2757 if (ratio != 1 && !ratio_p)
2758 cost += multiply_by_cost (ratio, Pmode);
2760 if (s_offset && !offset_p && !symbol_present)
2762 cost += add_cost (Pmode);
2763 var_present = true;
2766 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2767 if (!acost)
2769 acost = 0;
2771 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2772 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2773 if (ratio_p)
2774 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2776 if (var_present)
2777 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2779 if (symbol_present)
2781 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2782 if (offset_p)
2783 base = gen_rtx_fmt_e (CONST, Pmode,
2784 gen_rtx_fmt_ee (PLUS, Pmode,
2785 base,
2786 GEN_INT (off)));
2788 else if (offset_p)
2789 base = GEN_INT (off);
2790 else
2791 base = NULL_RTX;
2793 if (base)
2794 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2796 start_sequence ();
2797 addr = memory_address (Pmode, addr);
2798 seq = get_insns ();
2799 end_sequence ();
2801 acost = seq_cost (seq);
2802 acost += address_cost (addr, Pmode);
2804 if (!acost)
2805 acost = 1;
2806 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2809 return cost + acost;
2812 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2813 the bitmap to that we should store it. */
2815 static struct ivopts_data *fd_ivopts_data;
2816 static tree
2817 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2819 bitmap *depends_on = data;
2820 struct version_info *info;
2822 if (TREE_CODE (*expr_p) != SSA_NAME)
2823 return NULL_TREE;
2824 info = name_info (fd_ivopts_data, *expr_p);
2826 if (!info->inv_id || info->has_nonlin_use)
2827 return NULL_TREE;
2829 if (!*depends_on)
2830 *depends_on = BITMAP_ALLOC (NULL);
2831 bitmap_set_bit (*depends_on, info->inv_id);
2833 return NULL_TREE;
2836 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2837 invariants the computation depends on. */
2839 static unsigned
2840 force_var_cost (struct ivopts_data *data,
2841 tree expr, bitmap *depends_on)
2843 static bool costs_initialized = false;
2844 static unsigned integer_cost;
2845 static unsigned symbol_cost;
2846 static unsigned address_cost;
2847 tree op0, op1;
2848 unsigned cost0, cost1, cost;
2849 enum machine_mode mode;
2851 if (!costs_initialized)
2853 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2854 rtx x = gen_rtx_MEM (DECL_MODE (var),
2855 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2856 tree addr;
2857 tree type = build_pointer_type (integer_type_node);
2859 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2860 2000));
2862 SET_DECL_RTL (var, x);
2863 TREE_STATIC (var) = 1;
2864 addr = build1 (ADDR_EXPR, type, var);
2865 symbol_cost = computation_cost (addr) + 1;
2867 address_cost
2868 = computation_cost (build2 (PLUS_EXPR, type,
2869 addr,
2870 build_int_cst_type (type, 2000))) + 1;
2871 if (dump_file && (dump_flags & TDF_DETAILS))
2873 fprintf (dump_file, "force_var_cost:\n");
2874 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2875 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2876 fprintf (dump_file, " address %d\n", (int) address_cost);
2877 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2878 fprintf (dump_file, "\n");
2881 costs_initialized = true;
2884 STRIP_NOPS (expr);
2886 if (depends_on)
2888 fd_ivopts_data = data;
2889 walk_tree (&expr, find_depends, depends_on, NULL);
2892 if (SSA_VAR_P (expr))
2893 return 0;
2895 if (TREE_INVARIANT (expr))
2897 if (TREE_CODE (expr) == INTEGER_CST)
2898 return integer_cost;
2900 if (TREE_CODE (expr) == ADDR_EXPR)
2902 tree obj = TREE_OPERAND (expr, 0);
2904 if (TREE_CODE (obj) == VAR_DECL
2905 || TREE_CODE (obj) == PARM_DECL
2906 || TREE_CODE (obj) == RESULT_DECL)
2907 return symbol_cost;
2910 return address_cost;
2913 switch (TREE_CODE (expr))
2915 case PLUS_EXPR:
2916 case MINUS_EXPR:
2917 case MULT_EXPR:
2918 op0 = TREE_OPERAND (expr, 0);
2919 op1 = TREE_OPERAND (expr, 1);
2920 STRIP_NOPS (op0);
2921 STRIP_NOPS (op1);
2923 if (is_gimple_val (op0))
2924 cost0 = 0;
2925 else
2926 cost0 = force_var_cost (data, op0, NULL);
2928 if (is_gimple_val (op1))
2929 cost1 = 0;
2930 else
2931 cost1 = force_var_cost (data, op1, NULL);
2933 break;
2935 default:
2936 /* Just an arbitrary value, FIXME. */
2937 return target_spill_cost;
2940 mode = TYPE_MODE (TREE_TYPE (expr));
2941 switch (TREE_CODE (expr))
2943 case PLUS_EXPR:
2944 case MINUS_EXPR:
2945 cost = add_cost (mode);
2946 break;
2948 case MULT_EXPR:
2949 if (cst_and_fits_in_hwi (op0))
2950 cost = multiply_by_cost (int_cst_value (op0), mode);
2951 else if (cst_and_fits_in_hwi (op1))
2952 cost = multiply_by_cost (int_cst_value (op1), mode);
2953 else
2954 return target_spill_cost;
2955 break;
2957 default:
2958 gcc_unreachable ();
2961 cost += cost0;
2962 cost += cost1;
2964 /* Bound the cost by target_spill_cost. The parts of complicated
2965 computations often are either loop invariant or at least can
2966 be shared between several iv uses, so letting this grow without
2967 limits would not give reasonable results. */
2968 return cost < target_spill_cost ? cost : target_spill_cost;
2971 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2972 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2973 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2974 invariants the computation depends on. */
2976 static unsigned
2977 split_address_cost (struct ivopts_data *data,
2978 tree addr, bool *symbol_present, bool *var_present,
2979 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2981 tree core;
2982 HOST_WIDE_INT bitsize;
2983 HOST_WIDE_INT bitpos;
2984 tree toffset;
2985 enum machine_mode mode;
2986 int unsignedp, volatilep;
2988 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2989 &unsignedp, &volatilep, false);
2991 if (toffset != 0
2992 || bitpos % BITS_PER_UNIT != 0
2993 || TREE_CODE (core) != VAR_DECL)
2995 *symbol_present = false;
2996 *var_present = true;
2997 fd_ivopts_data = data;
2998 walk_tree (&addr, find_depends, depends_on, NULL);
2999 return target_spill_cost;
3002 *offset += bitpos / BITS_PER_UNIT;
3003 if (TREE_STATIC (core)
3004 || DECL_EXTERNAL (core))
3006 *symbol_present = true;
3007 *var_present = false;
3008 return 0;
3011 *symbol_present = false;
3012 *var_present = true;
3013 return 0;
3016 /* Estimates cost of expressing difference of addresses E1 - E2 as
3017 var + symbol + offset. The value of offset is added to OFFSET,
3018 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3019 part is missing. DEPENDS_ON is a set of the invariants the computation
3020 depends on. */
3022 static unsigned
3023 ptr_difference_cost (struct ivopts_data *data,
3024 tree e1, tree e2, bool *symbol_present, bool *var_present,
3025 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3027 HOST_WIDE_INT diff = 0;
3028 unsigned cost;
3030 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3032 if (ptr_difference_const (e1, e2, &diff))
3034 *offset += diff;
3035 *symbol_present = false;
3036 *var_present = false;
3037 return 0;
3040 if (e2 == integer_zero_node)
3041 return split_address_cost (data, TREE_OPERAND (e1, 0),
3042 symbol_present, var_present, offset, depends_on);
3044 *symbol_present = false;
3045 *var_present = true;
3047 cost = force_var_cost (data, e1, depends_on);
3048 cost += force_var_cost (data, e2, depends_on);
3049 cost += add_cost (Pmode);
3051 return cost;
3054 /* Estimates cost of expressing difference E1 - E2 as
3055 var + symbol + offset. The value of offset is added to OFFSET,
3056 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3057 part is missing. DEPENDS_ON is a set of the invariants the computation
3058 depends on. */
3060 static unsigned
3061 difference_cost (struct ivopts_data *data,
3062 tree e1, tree e2, bool *symbol_present, bool *var_present,
3063 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3065 unsigned cost;
3066 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3067 unsigned HOST_WIDE_INT off1, off2;
3069 e1 = strip_offset (e1, false, &off1);
3070 e2 = strip_offset (e2, false, &off2);
3071 *offset += off1 - off2;
3073 STRIP_NOPS (e1);
3074 STRIP_NOPS (e2);
3076 if (TREE_CODE (e1) == ADDR_EXPR)
3077 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3078 depends_on);
3079 *symbol_present = false;
3081 if (operand_equal_p (e1, e2, 0))
3083 *var_present = false;
3084 return 0;
3086 *var_present = true;
3087 if (zero_p (e2))
3088 return force_var_cost (data, e1, depends_on);
3090 if (zero_p (e1))
3092 cost = force_var_cost (data, e2, depends_on);
3093 cost += multiply_by_cost (-1, mode);
3095 return cost;
3098 cost = force_var_cost (data, e1, depends_on);
3099 cost += force_var_cost (data, e2, depends_on);
3100 cost += add_cost (mode);
3102 return cost;
3105 /* Determines the cost of the computation by that USE is expressed
3106 from induction variable CAND. If ADDRESS_P is true, we just need
3107 to create an address from it, otherwise we want to get it into
3108 register. A set of invariants we depend on is stored in
3109 DEPENDS_ON. AT is the statement at that the value is computed. */
3111 static unsigned
3112 get_computation_cost_at (struct ivopts_data *data,
3113 struct iv_use *use, struct iv_cand *cand,
3114 bool address_p, bitmap *depends_on, tree at)
3116 tree ubase = use->iv->base, ustep = use->iv->step;
3117 tree cbase, cstep;
3118 tree utype = TREE_TYPE (ubase), ctype;
3119 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3120 HOST_WIDE_INT ratio, aratio;
3121 bool var_present, symbol_present;
3122 unsigned cost = 0, n_sums;
3124 *depends_on = NULL;
3126 /* Only consider real candidates. */
3127 if (!cand->iv)
3128 return INFTY;
3130 cbase = cand->iv->base;
3131 cstep = cand->iv->step;
3132 ctype = TREE_TYPE (cbase);
3134 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3136 /* We do not have a precision to express the values of use. */
3137 return INFTY;
3140 if (address_p)
3142 /* Do not try to express address of an object with computation based
3143 on address of a different object. This may cause problems in rtl
3144 level alias analysis (that does not expect this to be happening,
3145 as this is illegal in C), and would be unlikely to be useful
3146 anyway. */
3147 if (use->iv->base_object
3148 && cand->iv->base_object
3149 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3150 return INFTY;
3153 if (!cst_and_fits_in_hwi (ustep)
3154 || !cst_and_fits_in_hwi (cstep))
3155 return INFTY;
3157 if (TREE_CODE (ubase) == INTEGER_CST
3158 && !cst_and_fits_in_hwi (ubase))
3159 goto fallback;
3161 if (TREE_CODE (cbase) == INTEGER_CST
3162 && !cst_and_fits_in_hwi (cbase))
3163 goto fallback;
3165 ustepi = int_cst_value (ustep);
3166 cstepi = int_cst_value (cstep);
3168 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3170 /* TODO -- add direct handling of this case. */
3171 goto fallback;
3174 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3175 return INFTY;
3177 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3178 or ratio == 1, it is better to handle this like
3180 ubase - ratio * cbase + ratio * var
3182 (also holds in the case ratio == -1, TODO. */
3184 if (TREE_CODE (cbase) == INTEGER_CST)
3186 offset = - ratio * int_cst_value (cbase);
3187 cost += difference_cost (data,
3188 ubase, integer_zero_node,
3189 &symbol_present, &var_present, &offset,
3190 depends_on);
3192 else if (ratio == 1)
3194 cost += difference_cost (data,
3195 ubase, cbase,
3196 &symbol_present, &var_present, &offset,
3197 depends_on);
3199 else
3201 cost += force_var_cost (data, cbase, depends_on);
3202 cost += add_cost (TYPE_MODE (ctype));
3203 cost += difference_cost (data,
3204 ubase, integer_zero_node,
3205 &symbol_present, &var_present, &offset,
3206 depends_on);
3209 /* If we are after the increment, the value of the candidate is higher by
3210 one iteration. */
3211 if (stmt_after_increment (data->current_loop, cand, at))
3212 offset -= ratio * cstepi;
3214 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3215 (symbol/var/const parts may be omitted). If we are looking for an address,
3216 find the cost of addressing this. */
3217 if (address_p)
3218 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3220 /* Otherwise estimate the costs for computing the expression. */
3221 aratio = ratio > 0 ? ratio : -ratio;
3222 if (!symbol_present && !var_present && !offset)
3224 if (ratio != 1)
3225 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3227 return cost;
3230 if (aratio != 1)
3231 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3233 n_sums = 1;
3234 if (var_present
3235 /* Symbol + offset should be compile-time computable. */
3236 && (symbol_present || offset))
3237 n_sums++;
3239 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3241 fallback:
3243 /* Just get the expression, expand it and measure the cost. */
3244 tree comp = get_computation_at (data->current_loop, use, cand, at);
3246 if (!comp)
3247 return INFTY;
3249 if (address_p)
3250 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3252 return computation_cost (comp);
3256 /* Determines the cost of the computation by that USE is expressed
3257 from induction variable CAND. If ADDRESS_P is true, we just need
3258 to create an address from it, otherwise we want to get it into
3259 register. A set of invariants we depend on is stored in
3260 DEPENDS_ON. */
3262 static unsigned
3263 get_computation_cost (struct ivopts_data *data,
3264 struct iv_use *use, struct iv_cand *cand,
3265 bool address_p, bitmap *depends_on)
3267 return get_computation_cost_at (data,
3268 use, cand, address_p, depends_on, use->stmt);
3271 /* Determines cost of basing replacement of USE on CAND in a generic
3272 expression. */
3274 static bool
3275 determine_use_iv_cost_generic (struct ivopts_data *data,
3276 struct iv_use *use, struct iv_cand *cand)
3278 bitmap depends_on;
3279 unsigned cost;
3281 /* The simple case first -- if we need to express value of the preserved
3282 original biv, the cost is 0. This also prevents us from counting the
3283 cost of increment twice -- once at this use and once in the cost of
3284 the candidate. */
3285 if (cand->pos == IP_ORIGINAL
3286 && cand->incremented_at == use->stmt)
3288 set_use_iv_cost (data, use, cand, 0, NULL);
3289 return true;
3292 cost = get_computation_cost (data, use, cand, false, &depends_on);
3293 set_use_iv_cost (data, use, cand, cost, depends_on);
3295 return cost != INFTY;
3298 /* Determines cost of basing replacement of USE on CAND in an address. */
3300 static bool
3301 determine_use_iv_cost_address (struct ivopts_data *data,
3302 struct iv_use *use, struct iv_cand *cand)
3304 bitmap depends_on;
3305 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3307 set_use_iv_cost (data, use, cand, cost, depends_on);
3309 return cost != INFTY;
3312 /* Computes value of induction variable IV in iteration NITER. */
3314 static tree
3315 iv_value (struct iv *iv, tree niter)
3317 tree val;
3318 tree type = TREE_TYPE (iv->base);
3320 niter = fold_convert (type, niter);
3321 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3323 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3326 /* Computes value of candidate CAND at position AT in iteration NITER. */
3328 static tree
3329 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3331 tree val = iv_value (cand->iv, niter);
3332 tree type = TREE_TYPE (cand->iv->base);
3334 if (stmt_after_increment (loop, cand, at))
3335 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3337 return val;
3340 /* Returns period of induction variable iv. */
3342 static tree
3343 iv_period (struct iv *iv)
3345 tree step = iv->step, period, type;
3346 tree pow2div;
3348 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3350 /* Period of the iv is gcd (step, type range). Since type range is power
3351 of two, it suffices to determine the maximum power of two that divides
3352 step. */
3353 pow2div = num_ending_zeros (step);
3354 type = unsigned_type_for (TREE_TYPE (step));
3356 period = build_low_bits_mask (type,
3357 (TYPE_PRECISION (type)
3358 - tree_low_cst (pow2div, 1)));
3360 return period;
3363 /* Check whether it is possible to express the condition in USE by comparison
3364 of candidate CAND. If so, store the comparison code to COMPARE and the
3365 value compared with to BOUND. */
3367 static bool
3368 may_eliminate_iv (struct ivopts_data *data,
3369 struct iv_use *use, struct iv_cand *cand,
3370 enum tree_code *compare, tree *bound)
3372 basic_block ex_bb;
3373 edge exit;
3374 struct tree_niter_desc *niter;
3375 tree nit, nit_type;
3376 tree wider_type, period, per_type;
3377 struct loop *loop = data->current_loop;
3379 /* For now works only for exits that dominate the loop latch. TODO -- extend
3380 for other conditions inside loop body. */
3381 ex_bb = bb_for_stmt (use->stmt);
3382 if (use->stmt != last_stmt (ex_bb)
3383 || TREE_CODE (use->stmt) != COND_EXPR)
3384 return false;
3385 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3386 return false;
3388 exit = EDGE_SUCC (ex_bb, 0);
3389 if (flow_bb_inside_loop_p (loop, exit->dest))
3390 exit = EDGE_SUCC (ex_bb, 1);
3391 if (flow_bb_inside_loop_p (loop, exit->dest))
3392 return false;
3394 niter = niter_for_exit (data, exit);
3395 if (!niter
3396 || !zero_p (niter->may_be_zero))
3397 return false;
3399 nit = niter->niter;
3400 nit_type = TREE_TYPE (nit);
3402 /* Determine whether we may use the variable to test whether niter iterations
3403 elapsed. This is the case iff the period of the induction variable is
3404 greater than the number of iterations. */
3405 period = iv_period (cand->iv);
3406 if (!period)
3407 return false;
3408 per_type = TREE_TYPE (period);
3410 wider_type = TREE_TYPE (period);
3411 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3412 wider_type = per_type;
3413 else
3414 wider_type = nit_type;
3416 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3417 fold_convert (wider_type, period),
3418 fold_convert (wider_type, nit)))))
3419 return false;
3421 if (exit->flags & EDGE_TRUE_VALUE)
3422 *compare = EQ_EXPR;
3423 else
3424 *compare = NE_EXPR;
3426 *bound = cand_value_at (loop, cand, use->stmt, nit);
3427 return true;
3430 /* Determines cost of basing replacement of USE on CAND in a condition. */
3432 static bool
3433 determine_use_iv_cost_condition (struct ivopts_data *data,
3434 struct iv_use *use, struct iv_cand *cand)
3436 tree bound;
3437 enum tree_code compare;
3439 /* Only consider real candidates. */
3440 if (!cand->iv)
3442 set_use_iv_cost (data, use, cand, INFTY, NULL);
3443 return false;
3446 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3448 bitmap depends_on = NULL;
3449 unsigned cost = force_var_cost (data, bound, &depends_on);
3451 set_use_iv_cost (data, use, cand, cost, depends_on);
3452 return cost != INFTY;
3455 /* The induction variable elimination failed; just express the original
3456 giv. If it is compared with an invariant, note that we cannot get
3457 rid of it. */
3458 if (TREE_CODE (*use->op_p) == SSA_NAME)
3459 record_invariant (data, *use->op_p, true);
3460 else
3462 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3463 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3466 return determine_use_iv_cost_generic (data, use, cand);
3469 /* Checks whether it is possible to replace the final value of USE by
3470 a direct computation. If so, the formula is stored to *VALUE. */
3472 static bool
3473 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3474 tree *value)
3476 struct loop *loop = data->current_loop;
3477 edge exit;
3478 struct tree_niter_desc *niter;
3480 exit = single_dom_exit (loop);
3481 if (!exit)
3482 return false;
3484 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3485 bb_for_stmt (use->stmt)));
3487 niter = niter_for_single_dom_exit (data);
3488 if (!niter
3489 || !zero_p (niter->may_be_zero))
3490 return false;
3492 *value = iv_value (use->iv, niter->niter);
3494 return true;
3497 /* Determines cost of replacing final value of USE using CAND. */
3499 static bool
3500 determine_use_iv_cost_outer (struct ivopts_data *data,
3501 struct iv_use *use, struct iv_cand *cand)
3503 bitmap depends_on;
3504 unsigned cost;
3505 edge exit;
3506 tree value;
3507 struct loop *loop = data->current_loop;
3509 /* The simple case first -- if we need to express value of the preserved
3510 original biv, the cost is 0. This also prevents us from counting the
3511 cost of increment twice -- once at this use and once in the cost of
3512 the candidate. */
3513 if (cand->pos == IP_ORIGINAL
3514 && cand->incremented_at == use->stmt)
3516 set_use_iv_cost (data, use, cand, 0, NULL);
3517 return true;
3520 if (!cand->iv)
3522 if (!may_replace_final_value (data, use, &value))
3524 set_use_iv_cost (data, use, cand, INFTY, NULL);
3525 return false;
3528 depends_on = NULL;
3529 cost = force_var_cost (data, value, &depends_on);
3531 cost /= AVG_LOOP_NITER (loop);
3533 set_use_iv_cost (data, use, cand, cost, depends_on);
3534 return cost != INFTY;
3537 exit = single_dom_exit (loop);
3538 if (exit)
3540 /* If there is just a single exit, we may use value of the candidate
3541 after we take it to determine the value of use. */
3542 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3543 last_stmt (exit->src));
3544 if (cost != INFTY)
3545 cost /= AVG_LOOP_NITER (loop);
3547 else
3549 /* Otherwise we just need to compute the iv. */
3550 cost = get_computation_cost (data, use, cand, false, &depends_on);
3553 set_use_iv_cost (data, use, cand, cost, depends_on);
3555 return cost != INFTY;
3558 /* Determines cost of basing replacement of USE on CAND. Returns false
3559 if USE cannot be based on CAND. */
3561 static bool
3562 determine_use_iv_cost (struct ivopts_data *data,
3563 struct iv_use *use, struct iv_cand *cand)
3565 switch (use->type)
3567 case USE_NONLINEAR_EXPR:
3568 return determine_use_iv_cost_generic (data, use, cand);
3570 case USE_OUTER:
3571 return determine_use_iv_cost_outer (data, use, cand);
3573 case USE_ADDRESS:
3574 return determine_use_iv_cost_address (data, use, cand);
3576 case USE_COMPARE:
3577 return determine_use_iv_cost_condition (data, use, cand);
3579 default:
3580 gcc_unreachable ();
3584 /* Determines costs of basing the use of the iv on an iv candidate. */
3586 static void
3587 determine_use_iv_costs (struct ivopts_data *data)
3589 unsigned i, j;
3590 struct iv_use *use;
3591 struct iv_cand *cand;
3592 bitmap to_clear = BITMAP_ALLOC (NULL);
3594 alloc_use_cost_map (data);
3596 for (i = 0; i < n_iv_uses (data); i++)
3598 use = iv_use (data, i);
3600 if (data->consider_all_candidates)
3602 for (j = 0; j < n_iv_cands (data); j++)
3604 cand = iv_cand (data, j);
3605 determine_use_iv_cost (data, use, cand);
3608 else
3610 bitmap_iterator bi;
3612 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3614 cand = iv_cand (data, j);
3615 if (!determine_use_iv_cost (data, use, cand))
3616 bitmap_set_bit (to_clear, j);
3619 /* Remove the candidates for that the cost is infinite from
3620 the list of related candidates. */
3621 bitmap_and_compl_into (use->related_cands, to_clear);
3622 bitmap_clear (to_clear);
3626 BITMAP_FREE (to_clear);
3628 if (dump_file && (dump_flags & TDF_DETAILS))
3630 fprintf (dump_file, "Use-candidate costs:\n");
3632 for (i = 0; i < n_iv_uses (data); i++)
3634 use = iv_use (data, i);
3636 fprintf (dump_file, "Use %d:\n", i);
3637 fprintf (dump_file, " cand\tcost\tdepends on\n");
3638 for (j = 0; j < use->n_map_members; j++)
3640 if (!use->cost_map[j].cand
3641 || use->cost_map[j].cost == INFTY)
3642 continue;
3644 fprintf (dump_file, " %d\t%d\t",
3645 use->cost_map[j].cand->id,
3646 use->cost_map[j].cost);
3647 if (use->cost_map[j].depends_on)
3648 bitmap_print (dump_file,
3649 use->cost_map[j].depends_on, "","");
3650 fprintf (dump_file, "\n");
3653 fprintf (dump_file, "\n");
3655 fprintf (dump_file, "\n");
3659 /* Determines cost of the candidate CAND. */
3661 static void
3662 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3664 unsigned cost_base, cost_step;
3665 tree base;
3667 if (!cand->iv)
3669 cand->cost = 0;
3670 return;
3673 /* There are two costs associated with the candidate -- its increment
3674 and its initialization. The second is almost negligible for any loop
3675 that rolls enough, so we take it just very little into account. */
3677 base = cand->iv->base;
3678 cost_base = force_var_cost (data, base, NULL);
3679 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3681 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3683 /* Prefer the original iv unless we may gain something by replacing it. */
3684 if (cand->pos == IP_ORIGINAL)
3685 cand->cost--;
3687 /* Prefer not to insert statements into latch unless there are some
3688 already (so that we do not create unnecessary jumps). */
3689 if (cand->pos == IP_END
3690 && empty_block_p (ip_end_pos (data->current_loop)))
3691 cand->cost++;
3694 /* Determines costs of computation of the candidates. */
3696 static void
3697 determine_iv_costs (struct ivopts_data *data)
3699 unsigned i;
3701 if (dump_file && (dump_flags & TDF_DETAILS))
3703 fprintf (dump_file, "Candidate costs:\n");
3704 fprintf (dump_file, " cand\tcost\n");
3707 for (i = 0; i < n_iv_cands (data); i++)
3709 struct iv_cand *cand = iv_cand (data, i);
3711 determine_iv_cost (data, cand);
3713 if (dump_file && (dump_flags & TDF_DETAILS))
3714 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3717 if (dump_file && (dump_flags & TDF_DETAILS))
3718 fprintf (dump_file, "\n");
3721 /* Calculates cost for having SIZE induction variables. */
3723 static unsigned
3724 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3726 return global_cost_for_size (size,
3727 loop_data (data->current_loop)->regs_used,
3728 n_iv_uses (data));
3731 /* For each size of the induction variable set determine the penalty. */
3733 static void
3734 determine_set_costs (struct ivopts_data *data)
3736 unsigned j, n;
3737 tree phi, op;
3738 struct loop *loop = data->current_loop;
3739 bitmap_iterator bi;
3741 /* We use the following model (definitely improvable, especially the
3742 cost function -- TODO):
3744 We estimate the number of registers available (using MD data), name it A.
3746 We estimate the number of registers used by the loop, name it U. This
3747 number is obtained as the number of loop phi nodes (not counting virtual
3748 registers and bivs) + the number of variables from outside of the loop.
3750 We set a reserve R (free regs that are used for temporary computations,
3751 etc.). For now the reserve is a constant 3.
3753 Let I be the number of induction variables.
3755 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3756 make a lot of ivs without a reason).
3757 -- if A - R < U + I <= A, the cost is I * PRES_COST
3758 -- if U + I > A, the cost is I * PRES_COST and
3759 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3761 if (dump_file && (dump_flags & TDF_DETAILS))
3763 fprintf (dump_file, "Global costs:\n");
3764 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3765 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3766 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3767 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3770 n = 0;
3771 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3773 op = PHI_RESULT (phi);
3775 if (!is_gimple_reg (op))
3776 continue;
3778 if (get_iv (data, op))
3779 continue;
3781 n++;
3784 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3786 struct version_info *info = ver_info (data, j);
3788 if (info->inv_id && info->has_nonlin_use)
3789 n++;
3792 loop_data (loop)->regs_used = n;
3793 if (dump_file && (dump_flags & TDF_DETAILS))
3794 fprintf (dump_file, " regs_used %d\n", n);
3796 if (dump_file && (dump_flags & TDF_DETAILS))
3798 fprintf (dump_file, " cost for size:\n");
3799 fprintf (dump_file, " ivs\tcost\n");
3800 for (j = 0; j <= 2 * target_avail_regs; j++)
3801 fprintf (dump_file, " %d\t%d\n", j,
3802 ivopts_global_cost_for_size (data, j));
3803 fprintf (dump_file, "\n");
3807 /* Returns true if A is a cheaper cost pair than B. */
3809 static bool
3810 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3812 if (!a)
3813 return false;
3815 if (!b)
3816 return true;
3818 if (a->cost < b->cost)
3819 return true;
3821 if (a->cost > b->cost)
3822 return false;
3824 /* In case the costs are the same, prefer the cheaper candidate. */
3825 if (a->cand->cost < b->cand->cost)
3826 return true;
3828 return false;
3831 /* Computes the cost field of IVS structure. */
3833 static void
3834 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3836 unsigned cost = 0;
3838 cost += ivs->cand_use_cost;
3839 cost += ivs->cand_cost;
3840 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3842 ivs->cost = cost;
3845 /* Set USE not to be expressed by any candidate in IVS. */
3847 static void
3848 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3849 struct iv_use *use)
3851 unsigned uid = use->id, cid, iid;
3852 bitmap deps;
3853 struct cost_pair *cp;
3854 bitmap_iterator bi;
3856 cp = ivs->cand_for_use[uid];
3857 if (!cp)
3858 return;
3859 cid = cp->cand->id;
3861 ivs->bad_uses++;
3862 ivs->cand_for_use[uid] = NULL;
3863 ivs->n_cand_uses[cid]--;
3865 if (ivs->n_cand_uses[cid] == 0)
3867 bitmap_clear_bit (ivs->cands, cid);
3868 /* Do not count the pseudocandidates. */
3869 if (cp->cand->iv)
3870 ivs->n_regs--;
3871 ivs->n_cands--;
3872 ivs->cand_cost -= cp->cand->cost;
3875 ivs->cand_use_cost -= cp->cost;
3877 deps = cp->depends_on;
3879 if (deps)
3881 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3883 ivs->n_invariant_uses[iid]--;
3884 if (ivs->n_invariant_uses[iid] == 0)
3885 ivs->n_regs--;
3889 iv_ca_recount_cost (data, ivs);
3892 /* Set cost pair for USE in set IVS to CP. */
3894 static void
3895 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3896 struct iv_use *use, struct cost_pair *cp)
3898 unsigned uid = use->id, cid, iid;
3899 bitmap deps;
3900 bitmap_iterator bi;
3902 if (ivs->cand_for_use[uid] == cp)
3903 return;
3905 if (ivs->cand_for_use[uid])
3906 iv_ca_set_no_cp (data, ivs, use);
3908 if (cp)
3910 cid = cp->cand->id;
3912 ivs->bad_uses--;
3913 ivs->cand_for_use[uid] = cp;
3914 ivs->n_cand_uses[cid]++;
3915 if (ivs->n_cand_uses[cid] == 1)
3917 bitmap_set_bit (ivs->cands, cid);
3918 /* Do not count the pseudocandidates. */
3919 if (cp->cand->iv)
3920 ivs->n_regs++;
3921 ivs->n_cands++;
3922 ivs->cand_cost += cp->cand->cost;
3925 ivs->cand_use_cost += cp->cost;
3927 deps = cp->depends_on;
3929 if (deps)
3931 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3933 ivs->n_invariant_uses[iid]++;
3934 if (ivs->n_invariant_uses[iid] == 1)
3935 ivs->n_regs++;
3939 iv_ca_recount_cost (data, ivs);
3943 /* Extend set IVS by expressing USE by some of the candidates in it
3944 if possible. */
3946 static void
3947 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3948 struct iv_use *use)
3950 struct cost_pair *best_cp = NULL, *cp;
3951 bitmap_iterator bi;
3952 unsigned i;
3954 gcc_assert (ivs->upto >= use->id);
3956 if (ivs->upto == use->id)
3958 ivs->upto++;
3959 ivs->bad_uses++;
3962 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3964 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3966 if (cheaper_cost_pair (cp, best_cp))
3967 best_cp = cp;
3970 iv_ca_set_cp (data, ivs, use, best_cp);
3973 /* Get cost for assignment IVS. */
3975 static unsigned
3976 iv_ca_cost (struct iv_ca *ivs)
3978 return (ivs->bad_uses ? INFTY : ivs->cost);
3981 /* Returns true if all dependences of CP are among invariants in IVS. */
3983 static bool
3984 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3986 unsigned i;
3987 bitmap_iterator bi;
3989 if (!cp->depends_on)
3990 return true;
3992 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
3994 if (ivs->n_invariant_uses[i] == 0)
3995 return false;
3998 return true;
4001 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4002 it before NEXT_CHANGE. */
4004 static struct iv_ca_delta *
4005 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4006 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4008 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4010 change->use = use;
4011 change->old_cp = old_cp;
4012 change->new_cp = new_cp;
4013 change->next_change = next_change;
4015 return change;
4018 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4019 are rewritten. */
4021 static struct iv_ca_delta *
4022 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4024 struct iv_ca_delta *last;
4026 if (!l2)
4027 return l1;
4029 if (!l1)
4030 return l2;
4032 for (last = l1; last->next_change; last = last->next_change)
4033 continue;
4034 last->next_change = l2;
4036 return l1;
4039 /* Returns candidate by that USE is expressed in IVS. */
4041 static struct cost_pair *
4042 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4044 return ivs->cand_for_use[use->id];
4047 /* Reverse the list of changes DELTA, forming the inverse to it. */
4049 static struct iv_ca_delta *
4050 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4052 struct iv_ca_delta *act, *next, *prev = NULL;
4053 struct cost_pair *tmp;
4055 for (act = delta; act; act = next)
4057 next = act->next_change;
4058 act->next_change = prev;
4059 prev = act;
4061 tmp = act->old_cp;
4062 act->old_cp = act->new_cp;
4063 act->new_cp = tmp;
4066 return prev;
4069 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4070 reverted instead. */
4072 static void
4073 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4074 struct iv_ca_delta *delta, bool forward)
4076 struct cost_pair *from, *to;
4077 struct iv_ca_delta *act;
4079 if (!forward)
4080 delta = iv_ca_delta_reverse (delta);
4082 for (act = delta; act; act = act->next_change)
4084 from = act->old_cp;
4085 to = act->new_cp;
4086 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4087 iv_ca_set_cp (data, ivs, act->use, to);
4090 if (!forward)
4091 iv_ca_delta_reverse (delta);
4094 /* Returns true if CAND is used in IVS. */
4096 static bool
4097 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4099 return ivs->n_cand_uses[cand->id] > 0;
4102 /* Returns number of induction variable candidates in the set IVS. */
4104 static unsigned
4105 iv_ca_n_cands (struct iv_ca *ivs)
4107 return ivs->n_cands;
4110 /* Free the list of changes DELTA. */
4112 static void
4113 iv_ca_delta_free (struct iv_ca_delta **delta)
4115 struct iv_ca_delta *act, *next;
4117 for (act = *delta; act; act = next)
4119 next = act->next_change;
4120 free (act);
4123 *delta = NULL;
4126 /* Allocates new iv candidates assignment. */
4128 static struct iv_ca *
4129 iv_ca_new (struct ivopts_data *data)
4131 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4133 nw->upto = 0;
4134 nw->bad_uses = 0;
4135 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4136 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4137 nw->cands = BITMAP_ALLOC (NULL);
4138 nw->n_cands = 0;
4139 nw->n_regs = 0;
4140 nw->cand_use_cost = 0;
4141 nw->cand_cost = 0;
4142 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4143 nw->cost = 0;
4145 return nw;
4148 /* Free memory occupied by the set IVS. */
4150 static void
4151 iv_ca_free (struct iv_ca **ivs)
4153 free ((*ivs)->cand_for_use);
4154 free ((*ivs)->n_cand_uses);
4155 BITMAP_FREE ((*ivs)->cands);
4156 free ((*ivs)->n_invariant_uses);
4157 free (*ivs);
4158 *ivs = NULL;
4161 /* Dumps IVS to FILE. */
4163 static void
4164 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4166 const char *pref = " invariants ";
4167 unsigned i;
4169 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4170 bitmap_print (file, ivs->cands, " candidates ","\n");
4172 for (i = 1; i <= data->max_inv_id; i++)
4173 if (ivs->n_invariant_uses[i])
4175 fprintf (file, "%s%d", pref, i);
4176 pref = ", ";
4178 fprintf (file, "\n");
4181 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4182 new set, and store differences in DELTA. Number of induction variables
4183 in the new set is stored to N_IVS. */
4185 static unsigned
4186 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4187 struct iv_cand *cand, struct iv_ca_delta **delta,
4188 unsigned *n_ivs)
4190 unsigned i, cost;
4191 struct iv_use *use;
4192 struct cost_pair *old_cp, *new_cp;
4194 *delta = NULL;
4195 for (i = 0; i < ivs->upto; i++)
4197 use = iv_use (data, i);
4198 old_cp = iv_ca_cand_for_use (ivs, use);
4200 if (old_cp
4201 && old_cp->cand == cand)
4202 continue;
4204 new_cp = get_use_iv_cost (data, use, cand);
4205 if (!new_cp)
4206 continue;
4208 if (!iv_ca_has_deps (ivs, new_cp))
4209 continue;
4211 if (!cheaper_cost_pair (new_cp, old_cp))
4212 continue;
4214 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4217 iv_ca_delta_commit (data, ivs, *delta, true);
4218 cost = iv_ca_cost (ivs);
4219 if (n_ivs)
4220 *n_ivs = iv_ca_n_cands (ivs);
4221 iv_ca_delta_commit (data, ivs, *delta, false);
4223 return cost;
4226 /* Try narrowing set IVS by removing CAND. Return the cost of
4227 the new set and store the differences in DELTA. */
4229 static unsigned
4230 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4231 struct iv_cand *cand, struct iv_ca_delta **delta)
4233 unsigned i, ci;
4234 struct iv_use *use;
4235 struct cost_pair *old_cp, *new_cp, *cp;
4236 bitmap_iterator bi;
4237 struct iv_cand *cnd;
4238 unsigned cost;
4240 *delta = NULL;
4241 for (i = 0; i < n_iv_uses (data); i++)
4243 use = iv_use (data, i);
4245 old_cp = iv_ca_cand_for_use (ivs, use);
4246 if (old_cp->cand != cand)
4247 continue;
4249 new_cp = NULL;
4251 if (data->consider_all_candidates)
4253 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4255 if (ci == cand->id)
4256 continue;
4258 cnd = iv_cand (data, ci);
4260 cp = get_use_iv_cost (data, use, cnd);
4261 if (!cp)
4262 continue;
4263 if (!iv_ca_has_deps (ivs, cp))
4264 continue;
4266 if (!cheaper_cost_pair (cp, new_cp))
4267 continue;
4269 new_cp = cp;
4272 else
4274 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4276 if (ci == cand->id)
4277 continue;
4279 cnd = iv_cand (data, ci);
4281 cp = get_use_iv_cost (data, use, cnd);
4282 if (!cp)
4283 continue;
4284 if (!iv_ca_has_deps (ivs, cp))
4285 continue;
4287 if (!cheaper_cost_pair (cp, new_cp))
4288 continue;
4290 new_cp = cp;
4294 if (!new_cp)
4296 iv_ca_delta_free (delta);
4297 return INFTY;
4300 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4303 iv_ca_delta_commit (data, ivs, *delta, true);
4304 cost = iv_ca_cost (ivs);
4305 iv_ca_delta_commit (data, ivs, *delta, false);
4307 return cost;
4310 /* Try optimizing the set of candidates IVS by removing candidates different
4311 from to EXCEPT_CAND from it. Return cost of the new set, and store
4312 differences in DELTA. */
4314 static unsigned
4315 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4316 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4318 bitmap_iterator bi;
4319 struct iv_ca_delta *act_delta, *best_delta;
4320 unsigned i, best_cost, acost;
4321 struct iv_cand *cand;
4323 best_delta = NULL;
4324 best_cost = iv_ca_cost (ivs);
4326 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4328 cand = iv_cand (data, i);
4330 if (cand == except_cand)
4331 continue;
4333 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4335 if (acost < best_cost)
4337 best_cost = acost;
4338 iv_ca_delta_free (&best_delta);
4339 best_delta = act_delta;
4341 else
4342 iv_ca_delta_free (&act_delta);
4345 if (!best_delta)
4347 *delta = NULL;
4348 return best_cost;
4351 /* Recurse to possibly remove other unnecessary ivs. */
4352 iv_ca_delta_commit (data, ivs, best_delta, true);
4353 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4354 iv_ca_delta_commit (data, ivs, best_delta, false);
4355 *delta = iv_ca_delta_join (best_delta, *delta);
4356 return best_cost;
4359 /* Tries to extend the sets IVS in the best possible way in order
4360 to express the USE. */
4362 static bool
4363 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4364 struct iv_use *use)
4366 unsigned best_cost, act_cost;
4367 unsigned i;
4368 bitmap_iterator bi;
4369 struct iv_cand *cand;
4370 struct iv_ca_delta *best_delta = NULL, *act_delta;
4371 struct cost_pair *cp;
4373 iv_ca_add_use (data, ivs, use);
4374 best_cost = iv_ca_cost (ivs);
4376 cp = iv_ca_cand_for_use (ivs, use);
4377 if (cp)
4379 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4380 iv_ca_set_no_cp (data, ivs, use);
4383 /* First try important candidates. Only if it fails, try the specific ones.
4384 Rationale -- in loops with many variables the best choice often is to use
4385 just one generic biv. If we added here many ivs specific to the uses,
4386 the optimization algorithm later would be likely to get stuck in a local
4387 minimum, thus causing us to create too many ivs. The approach from
4388 few ivs to more seems more likely to be successful -- starting from few
4389 ivs, replacing an expensive use by a specific iv should always be a
4390 win. */
4391 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4393 cand = iv_cand (data, i);
4395 if (iv_ca_cand_used_p (ivs, cand))
4396 continue;
4398 cp = get_use_iv_cost (data, use, cand);
4399 if (!cp)
4400 continue;
4402 iv_ca_set_cp (data, ivs, use, cp);
4403 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4404 iv_ca_set_no_cp (data, ivs, use);
4405 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4407 if (act_cost < best_cost)
4409 best_cost = act_cost;
4411 iv_ca_delta_free (&best_delta);
4412 best_delta = act_delta;
4414 else
4415 iv_ca_delta_free (&act_delta);
4418 if (best_cost == INFTY)
4420 for (i = 0; i < use->n_map_members; i++)
4422 cp = use->cost_map + i;
4423 cand = cp->cand;
4424 if (!cand)
4425 continue;
4427 /* Already tried this. */
4428 if (cand->important)
4429 continue;
4431 if (iv_ca_cand_used_p (ivs, cand))
4432 continue;
4434 act_delta = NULL;
4435 iv_ca_set_cp (data, ivs, use, cp);
4436 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4437 iv_ca_set_no_cp (data, ivs, use);
4438 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4439 cp, act_delta);
4441 if (act_cost < best_cost)
4443 best_cost = act_cost;
4445 if (best_delta)
4446 iv_ca_delta_free (&best_delta);
4447 best_delta = act_delta;
4449 else
4450 iv_ca_delta_free (&act_delta);
4454 iv_ca_delta_commit (data, ivs, best_delta, true);
4455 iv_ca_delta_free (&best_delta);
4457 return (best_cost != INFTY);
4460 /* Finds an initial assignment of candidates to uses. */
4462 static struct iv_ca *
4463 get_initial_solution (struct ivopts_data *data)
4465 struct iv_ca *ivs = iv_ca_new (data);
4466 unsigned i;
4468 for (i = 0; i < n_iv_uses (data); i++)
4469 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4471 iv_ca_free (&ivs);
4472 return NULL;
4475 return ivs;
4478 /* Tries to improve set of induction variables IVS. */
4480 static bool
4481 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4483 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4484 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4485 struct iv_cand *cand;
4487 /* Try extending the set of induction variables by one. */
4488 for (i = 0; i < n_iv_cands (data); i++)
4490 cand = iv_cand (data, i);
4492 if (iv_ca_cand_used_p (ivs, cand))
4493 continue;
4495 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4496 if (!act_delta)
4497 continue;
4499 /* If we successfully added the candidate and the set is small enough,
4500 try optimizing it by removing other candidates. */
4501 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4503 iv_ca_delta_commit (data, ivs, act_delta, true);
4504 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4505 iv_ca_delta_commit (data, ivs, act_delta, false);
4506 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4509 if (acost < best_cost)
4511 best_cost = acost;
4512 iv_ca_delta_free (&best_delta);
4513 best_delta = act_delta;
4515 else
4516 iv_ca_delta_free (&act_delta);
4519 if (!best_delta)
4521 /* Try removing the candidates from the set instead. */
4522 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4524 /* Nothing more we can do. */
4525 if (!best_delta)
4526 return false;
4529 iv_ca_delta_commit (data, ivs, best_delta, true);
4530 gcc_assert (best_cost == iv_ca_cost (ivs));
4531 iv_ca_delta_free (&best_delta);
4532 return true;
4535 /* Attempts to find the optimal set of induction variables. We do simple
4536 greedy heuristic -- we try to replace at most one candidate in the selected
4537 solution and remove the unused ivs while this improves the cost. */
4539 static struct iv_ca *
4540 find_optimal_iv_set (struct ivopts_data *data)
4542 unsigned i;
4543 struct iv_ca *set;
4544 struct iv_use *use;
4546 /* Get the initial solution. */
4547 set = get_initial_solution (data);
4548 if (!set)
4550 if (dump_file && (dump_flags & TDF_DETAILS))
4551 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4552 return NULL;
4555 if (dump_file && (dump_flags & TDF_DETAILS))
4557 fprintf (dump_file, "Initial set of candidates:\n");
4558 iv_ca_dump (data, dump_file, set);
4561 while (try_improve_iv_set (data, set))
4563 if (dump_file && (dump_flags & TDF_DETAILS))
4565 fprintf (dump_file, "Improved to:\n");
4566 iv_ca_dump (data, dump_file, set);
4570 if (dump_file && (dump_flags & TDF_DETAILS))
4571 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4573 for (i = 0; i < n_iv_uses (data); i++)
4575 use = iv_use (data, i);
4576 use->selected = iv_ca_cand_for_use (set, use)->cand;
4579 return set;
4582 /* Creates a new induction variable corresponding to CAND. */
4584 static void
4585 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4587 block_stmt_iterator incr_pos;
4588 tree base;
4589 bool after = false;
4591 if (!cand->iv)
4592 return;
4594 switch (cand->pos)
4596 case IP_NORMAL:
4597 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4598 break;
4600 case IP_END:
4601 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4602 after = true;
4603 break;
4605 case IP_ORIGINAL:
4606 /* Mark that the iv is preserved. */
4607 name_info (data, cand->var_before)->preserve_biv = true;
4608 name_info (data, cand->var_after)->preserve_biv = true;
4610 /* Rewrite the increment so that it uses var_before directly. */
4611 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4613 return;
4616 gimple_add_tmp_var (cand->var_before);
4617 add_referenced_tmp_var (cand->var_before);
4619 base = unshare_expr (cand->iv->base);
4621 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4622 &incr_pos, after, &cand->var_before, &cand->var_after);
4625 /* Creates new induction variables described in SET. */
4627 static void
4628 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4630 unsigned i;
4631 struct iv_cand *cand;
4632 bitmap_iterator bi;
4634 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4636 cand = iv_cand (data, i);
4637 create_new_iv (data, cand);
4641 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4642 is true, remove also the ssa name defined by the statement. */
4644 static void
4645 remove_statement (tree stmt, bool including_defined_name)
4647 if (TREE_CODE (stmt) == PHI_NODE)
4649 if (!including_defined_name)
4651 /* Prevent the ssa name defined by the statement from being removed. */
4652 SET_PHI_RESULT (stmt, NULL);
4654 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4656 else
4658 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4660 bsi_remove (&bsi);
4664 /* Rewrites USE (definition of iv used in a nonlinear expression)
4665 using candidate CAND. */
4667 static void
4668 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4669 struct iv_use *use, struct iv_cand *cand)
4671 tree comp;
4672 tree op, stmts, tgt, ass;
4673 block_stmt_iterator bsi, pbsi;
4675 /* An important special case -- if we are asked to express value of
4676 the original iv by itself, just exit; there is no need to
4677 introduce a new computation (that might also need casting the
4678 variable to unsigned and back). */
4679 if (cand->pos == IP_ORIGINAL
4680 && TREE_CODE (use->stmt) == MODIFY_EXPR
4681 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4683 op = TREE_OPERAND (use->stmt, 1);
4685 /* Be a bit careful. In case variable is expressed in some
4686 complicated way, rewrite it so that we may get rid of this
4687 complicated expression. */
4688 if ((TREE_CODE (op) == PLUS_EXPR
4689 || TREE_CODE (op) == MINUS_EXPR)
4690 && TREE_OPERAND (op, 0) == cand->var_before
4691 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4692 return;
4695 comp = unshare_expr (get_computation (data->current_loop,
4696 use, cand));
4697 switch (TREE_CODE (use->stmt))
4699 case PHI_NODE:
4700 tgt = PHI_RESULT (use->stmt);
4702 /* If we should keep the biv, do not replace it. */
4703 if (name_info (data, tgt)->preserve_biv)
4704 return;
4706 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4707 while (!bsi_end_p (pbsi)
4708 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4710 bsi = pbsi;
4711 bsi_next (&pbsi);
4713 break;
4715 case MODIFY_EXPR:
4716 tgt = TREE_OPERAND (use->stmt, 0);
4717 bsi = bsi_for_stmt (use->stmt);
4718 break;
4720 default:
4721 gcc_unreachable ();
4724 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4726 if (TREE_CODE (use->stmt) == PHI_NODE)
4728 if (stmts)
4729 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4730 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4731 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4732 remove_statement (use->stmt, false);
4733 SSA_NAME_DEF_STMT (tgt) = ass;
4735 else
4737 if (stmts)
4738 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4739 TREE_OPERAND (use->stmt, 1) = op;
4743 /* Replaces ssa name in index IDX by its basic variable. Callback for
4744 for_each_index. */
4746 static bool
4747 idx_remove_ssa_names (tree base, tree *idx,
4748 void *data ATTRIBUTE_UNUSED)
4750 tree *op;
4752 if (TREE_CODE (*idx) == SSA_NAME)
4753 *idx = SSA_NAME_VAR (*idx);
4755 if (TREE_CODE (base) == ARRAY_REF)
4757 op = &TREE_OPERAND (base, 2);
4758 if (*op
4759 && TREE_CODE (*op) == SSA_NAME)
4760 *op = SSA_NAME_VAR (*op);
4761 op = &TREE_OPERAND (base, 3);
4762 if (*op
4763 && TREE_CODE (*op) == SSA_NAME)
4764 *op = SSA_NAME_VAR (*op);
4767 return true;
4770 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4772 static tree
4773 unshare_and_remove_ssa_names (tree ref)
4775 ref = unshare_expr (ref);
4776 for_each_index (&ref, idx_remove_ssa_names, NULL);
4778 return ref;
4781 /* Rewrites base of memory access OP with expression WITH in statement
4782 pointed to by BSI. */
4784 static void
4785 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4787 tree bvar, var, new_var, new_name, copy, name;
4788 tree orig;
4790 var = bvar = get_base_address (*op);
4792 if (!var || TREE_CODE (with) != SSA_NAME)
4793 goto do_rewrite;
4795 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4796 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4797 if (TREE_CODE (var) == INDIRECT_REF)
4798 var = TREE_OPERAND (var, 0);
4799 if (TREE_CODE (var) == SSA_NAME)
4801 name = var;
4802 var = SSA_NAME_VAR (var);
4804 else if (DECL_P (var))
4805 name = NULL_TREE;
4806 else
4807 goto do_rewrite;
4809 if (var_ann (var)->type_mem_tag)
4810 var = var_ann (var)->type_mem_tag;
4812 /* We need to add a memory tag for the variable. But we do not want
4813 to add it to the temporary used for the computations, since this leads
4814 to problems in redundancy elimination when there are common parts
4815 in two computations referring to the different arrays. So we copy
4816 the variable to a new temporary. */
4817 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4818 if (name)
4819 new_name = duplicate_ssa_name (name, copy);
4820 else
4822 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4823 add_referenced_tmp_var (new_var);
4824 var_ann (new_var)->type_mem_tag = var;
4825 new_name = make_ssa_name (new_var, copy);
4827 TREE_OPERAND (copy, 0) = new_name;
4828 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4829 with = new_name;
4831 do_rewrite:
4833 orig = NULL_TREE;
4834 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4835 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4837 if (TREE_CODE (*op) == INDIRECT_REF)
4838 orig = REF_ORIGINAL (*op);
4839 if (!orig)
4840 orig = unshare_and_remove_ssa_names (*op);
4842 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4844 /* Record the original reference, for purposes of alias analysis. */
4845 REF_ORIGINAL (*op) = orig;
4848 /* Rewrites USE (address that is an iv) using candidate CAND. */
4850 static void
4851 rewrite_use_address (struct ivopts_data *data,
4852 struct iv_use *use, struct iv_cand *cand)
4854 tree comp = unshare_expr (get_computation (data->current_loop,
4855 use, cand));
4856 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4857 tree stmts;
4858 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4860 if (stmts)
4861 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4863 rewrite_address_base (&bsi, use->op_p, op);
4866 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4867 candidate CAND. */
4869 static void
4870 rewrite_use_compare (struct ivopts_data *data,
4871 struct iv_use *use, struct iv_cand *cand)
4873 tree comp;
4874 tree *op_p, cond, op, stmts, bound;
4875 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4876 enum tree_code compare;
4878 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4880 op = force_gimple_operand (unshare_expr (bound), &stmts,
4881 true, NULL_TREE);
4883 if (stmts)
4884 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4886 *use->op_p = build2 (compare, boolean_type_node,
4887 var_at_stmt (data->current_loop,
4888 cand, use->stmt), op);
4889 modify_stmt (use->stmt);
4890 return;
4893 /* The induction variable elimination failed; just express the original
4894 giv. */
4895 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4897 cond = *use->op_p;
4898 op_p = &TREE_OPERAND (cond, 0);
4899 if (TREE_CODE (*op_p) != SSA_NAME
4900 || zero_p (get_iv (data, *op_p)->step))
4901 op_p = &TREE_OPERAND (cond, 1);
4903 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4904 if (stmts)
4905 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4907 *op_p = op;
4910 /* Ensure that operand *OP_P may be used at the end of EXIT without
4911 violating loop closed ssa form. */
4913 static void
4914 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4916 basic_block def_bb;
4917 struct loop *def_loop;
4918 tree phi, use;
4920 use = USE_FROM_PTR (op_p);
4921 if (TREE_CODE (use) != SSA_NAME)
4922 return;
4924 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4925 if (!def_bb)
4926 return;
4928 def_loop = def_bb->loop_father;
4929 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4930 return;
4932 /* Try finding a phi node that copies the value out of the loop. */
4933 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4934 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4935 break;
4937 if (!phi)
4939 /* Create such a phi node. */
4940 tree new_name = duplicate_ssa_name (use, NULL);
4942 phi = create_phi_node (new_name, exit->dest);
4943 SSA_NAME_DEF_STMT (new_name) = phi;
4944 add_phi_arg (phi, use, exit);
4947 SET_USE (op_p, PHI_RESULT (phi));
4950 /* Ensure that operands of STMT may be used at the end of EXIT without
4951 violating loop closed ssa form. */
4953 static void
4954 protect_loop_closed_ssa_form (edge exit, tree stmt)
4956 use_optype uses;
4957 vuse_optype vuses;
4958 v_may_def_optype v_may_defs;
4959 unsigned i;
4961 get_stmt_operands (stmt);
4963 uses = STMT_USE_OPS (stmt);
4964 for (i = 0; i < NUM_USES (uses); i++)
4965 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4967 vuses = STMT_VUSE_OPS (stmt);
4968 for (i = 0; i < NUM_VUSES (vuses); i++)
4969 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4971 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4972 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4973 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4976 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4977 so that they are emitted on the correct place, and so that the loop closed
4978 ssa form is preserved. */
4980 static void
4981 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4983 tree_stmt_iterator tsi;
4984 block_stmt_iterator bsi;
4985 tree phi, stmt, def, next;
4987 if (EDGE_COUNT (exit->dest->preds) > 1)
4988 split_loop_exit_edge (exit);
4990 if (TREE_CODE (stmts) == STATEMENT_LIST)
4992 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4993 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4995 else
4996 protect_loop_closed_ssa_form (exit, stmts);
4998 /* Ensure there is label in exit->dest, so that we can
4999 insert after it. */
5000 tree_block_label (exit->dest);
5001 bsi = bsi_after_labels (exit->dest);
5002 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5004 if (!op)
5005 return;
5007 for (phi = phi_nodes (exit->dest); phi; phi = next)
5009 next = PHI_CHAIN (phi);
5011 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5013 def = PHI_RESULT (phi);
5014 remove_statement (phi, false);
5015 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5016 def, op);
5017 SSA_NAME_DEF_STMT (def) = stmt;
5018 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5023 /* Rewrites the final value of USE (that is only needed outside of the loop)
5024 using candidate CAND. */
5026 static void
5027 rewrite_use_outer (struct ivopts_data *data,
5028 struct iv_use *use, struct iv_cand *cand)
5030 edge exit;
5031 tree value, op, stmts, tgt;
5032 tree phi;
5034 switch (TREE_CODE (use->stmt))
5036 case PHI_NODE:
5037 tgt = PHI_RESULT (use->stmt);
5038 break;
5039 case MODIFY_EXPR:
5040 tgt = TREE_OPERAND (use->stmt, 0);
5041 break;
5042 default:
5043 gcc_unreachable ();
5046 exit = single_dom_exit (data->current_loop);
5048 if (exit)
5050 if (!cand->iv)
5052 bool ok = may_replace_final_value (data, use, &value);
5053 gcc_assert (ok);
5055 else
5056 value = get_computation_at (data->current_loop,
5057 use, cand, last_stmt (exit->src));
5059 value = unshare_expr (value);
5060 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5062 /* If we will preserve the iv anyway and we would need to perform
5063 some computation to replace the final value, do nothing. */
5064 if (stmts && name_info (data, tgt)->preserve_biv)
5065 return;
5067 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5069 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5071 if (USE_FROM_PTR (use_p) == tgt)
5072 SET_USE (use_p, op);
5075 if (stmts)
5076 compute_phi_arg_on_exit (exit, stmts, op);
5078 /* Enable removal of the statement. We cannot remove it directly,
5079 since we may still need the aliasing information attached to the
5080 ssa name defined by it. */
5081 name_info (data, tgt)->iv->have_use_for = false;
5082 return;
5085 /* If the variable is going to be preserved anyway, there is nothing to
5086 do. */
5087 if (name_info (data, tgt)->preserve_biv)
5088 return;
5090 /* Otherwise we just need to compute the iv. */
5091 rewrite_use_nonlinear_expr (data, use, cand);
5094 /* Rewrites USE using candidate CAND. */
5096 static void
5097 rewrite_use (struct ivopts_data *data,
5098 struct iv_use *use, struct iv_cand *cand)
5100 switch (use->type)
5102 case USE_NONLINEAR_EXPR:
5103 rewrite_use_nonlinear_expr (data, use, cand);
5104 break;
5106 case USE_OUTER:
5107 rewrite_use_outer (data, use, cand);
5108 break;
5110 case USE_ADDRESS:
5111 rewrite_use_address (data, use, cand);
5112 break;
5114 case USE_COMPARE:
5115 rewrite_use_compare (data, use, cand);
5116 break;
5118 default:
5119 gcc_unreachable ();
5121 modify_stmt (use->stmt);
5124 /* Rewrite the uses using the selected induction variables. */
5126 static void
5127 rewrite_uses (struct ivopts_data *data)
5129 unsigned i;
5130 struct iv_cand *cand;
5131 struct iv_use *use;
5133 for (i = 0; i < n_iv_uses (data); i++)
5135 use = iv_use (data, i);
5136 cand = use->selected;
5137 gcc_assert (cand);
5139 rewrite_use (data, use, cand);
5143 /* Removes the ivs that are not used after rewriting. */
5145 static void
5146 remove_unused_ivs (struct ivopts_data *data)
5148 unsigned j;
5149 bitmap_iterator bi;
5151 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5153 struct version_info *info;
5155 info = ver_info (data, j);
5156 if (info->iv
5157 && !zero_p (info->iv->step)
5158 && !info->inv_id
5159 && !info->iv->have_use_for
5160 && !info->preserve_biv)
5161 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5165 /* Frees data allocated by the optimization of a single loop. */
5167 static void
5168 free_loop_data (struct ivopts_data *data)
5170 unsigned i, j;
5171 bitmap_iterator bi;
5173 htab_empty (data->niters);
5175 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5177 struct version_info *info;
5179 info = ver_info (data, i);
5180 if (info->iv)
5181 free (info->iv);
5182 info->iv = NULL;
5183 info->has_nonlin_use = false;
5184 info->preserve_biv = false;
5185 info->inv_id = 0;
5187 bitmap_clear (data->relevant);
5188 bitmap_clear (data->important_candidates);
5190 for (i = 0; i < n_iv_uses (data); i++)
5192 struct iv_use *use = iv_use (data, i);
5194 free (use->iv);
5195 BITMAP_FREE (use->related_cands);
5196 for (j = 0; j < use->n_map_members; j++)
5197 if (use->cost_map[j].depends_on)
5198 BITMAP_FREE (use->cost_map[j].depends_on);
5199 free (use->cost_map);
5200 free (use);
5202 VARRAY_POP_ALL (data->iv_uses);
5204 for (i = 0; i < n_iv_cands (data); i++)
5206 struct iv_cand *cand = iv_cand (data, i);
5208 if (cand->iv)
5209 free (cand->iv);
5210 free (cand);
5212 VARRAY_POP_ALL (data->iv_candidates);
5214 if (data->version_info_size < num_ssa_names)
5216 data->version_info_size = 2 * num_ssa_names;
5217 free (data->version_info);
5218 data->version_info = xcalloc (data->version_info_size,
5219 sizeof (struct version_info));
5222 data->max_inv_id = 0;
5224 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5226 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5228 SET_DECL_RTL (obj, NULL_RTX);
5230 VARRAY_POP_ALL (decl_rtl_to_reset);
5233 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5234 loop tree. */
5236 static void
5237 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5239 unsigned i;
5241 for (i = 1; i < loops->num; i++)
5242 if (loops->parray[i])
5244 free (loops->parray[i]->aux);
5245 loops->parray[i]->aux = NULL;
5248 free_loop_data (data);
5249 free (data->version_info);
5250 BITMAP_FREE (data->relevant);
5251 BITMAP_FREE (data->important_candidates);
5252 htab_delete (data->niters);
5254 VARRAY_FREE (decl_rtl_to_reset);
5255 VARRAY_FREE (data->iv_uses);
5256 VARRAY_FREE (data->iv_candidates);
5259 /* Optimizes the LOOP. Returns true if anything changed. */
5261 static bool
5262 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5264 bool changed = false;
5265 struct iv_ca *iv_ca;
5266 edge exit;
5268 data->current_loop = loop;
5270 if (dump_file && (dump_flags & TDF_DETAILS))
5272 fprintf (dump_file, "Processing loop %d\n", loop->num);
5274 exit = single_dom_exit (loop);
5275 if (exit)
5277 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5278 exit->src->index, exit->dest->index);
5279 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5280 fprintf (dump_file, "\n");
5283 fprintf (dump_file, "\n");
5286 /* For each ssa name determines whether it behaves as an induction variable
5287 in some loop. */
5288 if (!find_induction_variables (data))
5289 goto finish;
5291 /* Finds interesting uses (item 1). */
5292 find_interesting_uses (data);
5293 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5294 goto finish;
5296 /* Finds candidates for the induction variables (item 2). */
5297 find_iv_candidates (data);
5299 /* Calculates the costs (item 3, part 1). */
5300 determine_use_iv_costs (data);
5301 determine_iv_costs (data);
5302 determine_set_costs (data);
5304 /* Find the optimal set of induction variables (item 3, part 2). */
5305 iv_ca = find_optimal_iv_set (data);
5306 if (!iv_ca)
5307 goto finish;
5308 changed = true;
5310 /* Create the new induction variables (item 4, part 1). */
5311 create_new_ivs (data, iv_ca);
5312 iv_ca_free (&iv_ca);
5314 /* Rewrite the uses (item 4, part 2). */
5315 rewrite_uses (data);
5317 /* Remove the ivs that are unused after rewriting. */
5318 remove_unused_ivs (data);
5320 /* We have changed the structure of induction variables; it might happen
5321 that definitions in the scev database refer to some of them that were
5322 eliminated. */
5323 scev_reset ();
5325 finish:
5326 free_loop_data (data);
5328 return changed;
5331 /* Main entry point. Optimizes induction variables in LOOPS. */
5333 void
5334 tree_ssa_iv_optimize (struct loops *loops)
5336 struct loop *loop;
5337 struct ivopts_data data;
5339 tree_ssa_iv_optimize_init (loops, &data);
5341 /* Optimize the loops starting with the innermost ones. */
5342 loop = loops->tree_root;
5343 while (loop->inner)
5344 loop = loop->inner;
5346 #ifdef ENABLE_CHECKING
5347 verify_loop_closed_ssa ();
5348 verify_stmts ();
5349 #endif
5351 /* Scan the loops, inner ones first. */
5352 while (loop != loops->tree_root)
5354 if (dump_file && (dump_flags & TDF_DETAILS))
5355 flow_loop_dump (loop, dump_file, NULL, 1);
5357 tree_ssa_iv_optimize_loop (&data, loop);
5359 if (loop->next)
5361 loop = loop->next;
5362 while (loop->inner)
5363 loop = loop->inner;
5365 else
5366 loop = loop->outer;
5369 #ifdef ENABLE_CHECKING
5370 verify_loop_closed_ssa ();
5371 verify_stmts ();
5372 #endif
5374 tree_ssa_iv_optimize_finalize (loops, &data);