2005-04-29 Jim Tison <jtison@us.ibm.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobf7373a44e85164bfaa17d06a05f69923370b1a19
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 typedef struct iv_use *iv_use_p;
199 DEF_VEC_P(iv_use_p);
200 DEF_VEC_ALLOC_P(iv_use_p,heap);
202 typedef struct iv_cand *iv_cand_p;
203 DEF_VEC_P(iv_cand_p);
204 DEF_VEC_ALLOC_P(iv_cand_p,heap);
206 struct ivopts_data
208 /* The currently optimized loop. */
209 struct loop *current_loop;
211 /* Numbers of iterations for all exits of the current loop. */
212 htab_t niters;
214 /* The size of version_info array allocated. */
215 unsigned version_info_size;
217 /* The array of information for the ssa names. */
218 struct version_info *version_info;
220 /* The bitmap of indices in version_info whose value was changed. */
221 bitmap relevant;
223 /* The maximum invariant id. */
224 unsigned max_inv_id;
226 /* The uses of induction variables. */
227 VEC(iv_use_p,heap) *iv_uses;
229 /* The candidates. */
230 VEC(iv_cand_p,heap) *iv_candidates;
232 /* A bitmap of important candidates. */
233 bitmap important_candidates;
235 /* Whether to consider just related and important candidates when replacing a
236 use. */
237 bool consider_all_candidates;
240 /* An assignment of iv candidates to uses. */
242 struct iv_ca
244 /* The number of uses covered by the assignment. */
245 unsigned upto;
247 /* Number of uses that cannot be expressed by the candidates in the set. */
248 unsigned bad_uses;
250 /* Candidate assigned to a use, together with the related costs. */
251 struct cost_pair **cand_for_use;
253 /* Number of times each candidate is used. */
254 unsigned *n_cand_uses;
256 /* The candidates used. */
257 bitmap cands;
259 /* The number of candidates in the set. */
260 unsigned n_cands;
262 /* Total number of registers needed. */
263 unsigned n_regs;
265 /* Total cost of expressing uses. */
266 unsigned cand_use_cost;
268 /* Total cost of candidates. */
269 unsigned cand_cost;
271 /* Number of times each invariant is used. */
272 unsigned *n_invariant_uses;
274 /* Total cost of the assignment. */
275 unsigned cost;
278 /* Difference of two iv candidate assignments. */
280 struct iv_ca_delta
282 /* Changed use. */
283 struct iv_use *use;
285 /* An old assignment (for rollback purposes). */
286 struct cost_pair *old_cp;
288 /* A new assignment. */
289 struct cost_pair *new_cp;
291 /* Next change in the list. */
292 struct iv_ca_delta *next_change;
295 /* Bound on number of candidates below that all candidates are considered. */
297 #define CONSIDER_ALL_CANDIDATES_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
301 optimizing such a loop would help, and it would take ages). */
303 #define MAX_CONSIDERED_USES \
304 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
306 /* If there are at most this number of ivs in the set, try removing unnecessary
307 ivs from the set always. */
309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
310 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
312 /* The list of trees for that the decl_rtl field must be reset is stored
313 here. */
315 static VEC(tree,heap) *decl_rtl_to_reset;
317 /* Number of uses recorded in DATA. */
319 static inline unsigned
320 n_iv_uses (struct ivopts_data *data)
322 return VEC_length (iv_use_p, data->iv_uses);
325 /* Ith use recorded in DATA. */
327 static inline struct iv_use *
328 iv_use (struct ivopts_data *data, unsigned i)
330 return VEC_index (iv_use_p, data->iv_uses, i);
333 /* Number of candidates recorded in DATA. */
335 static inline unsigned
336 n_iv_cands (struct ivopts_data *data)
338 return VEC_length (iv_cand_p, data->iv_candidates);
341 /* Ith candidate recorded in DATA. */
343 static inline struct iv_cand *
344 iv_cand (struct ivopts_data *data, unsigned i)
346 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 /* The data for LOOP. */
351 static inline struct loop_data *
352 loop_data (struct loop *loop)
354 return loop->aux;
357 /* The single loop exit if it dominates the latch, NULL otherwise. */
359 static edge
360 single_dom_exit (struct loop *loop)
362 edge exit = loop->single_exit;
364 if (!exit)
365 return NULL;
367 if (!just_once_each_iteration_p (loop, exit->src))
368 return NULL;
370 return exit;
373 /* Dumps information about the induction variable IV to FILE. */
375 extern void dump_iv (FILE *, struct iv *);
376 void
377 dump_iv (FILE *file, struct iv *iv)
379 if (iv->ssa_name)
381 fprintf (file, "ssa name ");
382 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
383 fprintf (file, "\n");
386 fprintf (file, " type ");
387 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
388 fprintf (file, "\n");
390 if (iv->step)
392 fprintf (file, " base ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
396 fprintf (file, " step ");
397 print_generic_expr (file, iv->step, TDF_SLIM);
398 fprintf (file, "\n");
400 else
402 fprintf (file, " invariant ");
403 print_generic_expr (file, iv->base, TDF_SLIM);
404 fprintf (file, "\n");
407 if (iv->base_object)
409 fprintf (file, " base object ");
410 print_generic_expr (file, iv->base_object, TDF_SLIM);
411 fprintf (file, "\n");
414 if (iv->biv_p)
415 fprintf (file, " is a biv\n");
418 /* Dumps information about the USE to FILE. */
420 extern void dump_use (FILE *, struct iv_use *);
421 void
422 dump_use (FILE *file, struct iv_use *use)
424 fprintf (file, "use %d\n", use->id);
426 switch (use->type)
428 case USE_NONLINEAR_EXPR:
429 fprintf (file, " generic\n");
430 break;
432 case USE_OUTER:
433 fprintf (file, " outside\n");
434 break;
436 case USE_ADDRESS:
437 fprintf (file, " address\n");
438 break;
440 case USE_COMPARE:
441 fprintf (file, " compare\n");
442 break;
444 default:
445 gcc_unreachable ();
448 fprintf (file, " in statement ");
449 print_generic_expr (file, use->stmt, TDF_SLIM);
450 fprintf (file, "\n");
452 fprintf (file, " at position ");
453 if (use->op_p)
454 print_generic_expr (file, *use->op_p, TDF_SLIM);
455 fprintf (file, "\n");
457 dump_iv (file, use->iv);
459 if (use->related_cands)
461 fprintf (file, " related candidates ");
462 dump_bitmap (file, use->related_cands);
466 /* Dumps information about the uses to FILE. */
468 extern void dump_uses (FILE *, struct ivopts_data *);
469 void
470 dump_uses (FILE *file, struct ivopts_data *data)
472 unsigned i;
473 struct iv_use *use;
475 for (i = 0; i < n_iv_uses (data); i++)
477 use = iv_use (data, i);
479 dump_use (file, use);
480 fprintf (file, "\n");
484 /* Dumps information about induction variable candidate CAND to FILE. */
486 extern void dump_cand (FILE *, struct iv_cand *);
487 void
488 dump_cand (FILE *file, struct iv_cand *cand)
490 struct iv *iv = cand->iv;
492 fprintf (file, "candidate %d%s\n",
493 cand->id, cand->important ? " (important)" : "");
495 if (!iv)
497 fprintf (file, " final value replacement\n");
498 return;
501 switch (cand->pos)
503 case IP_NORMAL:
504 fprintf (file, " incremented before exit test\n");
505 break;
507 case IP_END:
508 fprintf (file, " incremented at end\n");
509 break;
511 case IP_ORIGINAL:
512 fprintf (file, " original biv\n");
513 break;
516 dump_iv (file, iv);
519 /* Returns the info for ssa version VER. */
521 static inline struct version_info *
522 ver_info (struct ivopts_data *data, unsigned ver)
524 return data->version_info + ver;
527 /* Returns the info for ssa name NAME. */
529 static inline struct version_info *
530 name_info (struct ivopts_data *data, tree name)
532 return ver_info (data, SSA_NAME_VERSION (name));
535 /* Checks whether there exists number X such that X * B = A, counting modulo
536 2^BITS. */
538 static bool
539 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
540 HOST_WIDE_INT *x)
542 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
543 unsigned HOST_WIDE_INT inv, ex, val;
544 unsigned i;
546 a &= mask;
547 b &= mask;
549 /* First divide the whole equation by 2 as long as possible. */
550 while (!(a & 1) && !(b & 1))
552 a >>= 1;
553 b >>= 1;
554 bits--;
555 mask >>= 1;
558 if (!(b & 1))
560 /* If b is still even, a is odd and there is no such x. */
561 return false;
564 /* Find the inverse of b. We compute it as
565 b^(2^(bits - 1) - 1) (mod 2^bits). */
566 inv = 1;
567 ex = b;
568 for (i = 0; i < bits - 1; i++)
570 inv = (inv * ex) & mask;
571 ex = (ex * ex) & mask;
574 val = (a * inv) & mask;
576 gcc_assert (((val * b) & mask) == a);
578 if ((val >> (bits - 1)) & 1)
579 val |= ~mask;
581 *x = val;
583 return true;
586 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
587 emitted in LOOP. */
589 static bool
590 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
592 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
594 gcc_assert (bb);
596 if (sbb == loop->latch)
597 return true;
599 if (sbb != bb)
600 return false;
602 return stmt == last_stmt (bb);
605 /* Returns true if STMT if after the place where the original induction
606 variable CAND is incremented. */
608 static bool
609 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
611 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
612 basic_block stmt_bb = bb_for_stmt (stmt);
613 block_stmt_iterator bsi;
615 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
616 return false;
618 if (stmt_bb != cand_bb)
619 return true;
621 /* Scan the block from the end, since the original ivs are usually
622 incremented at the end of the loop body. */
623 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
625 if (bsi_stmt (bsi) == cand->incremented_at)
626 return false;
627 if (bsi_stmt (bsi) == stmt)
628 return true;
632 /* Returns true if STMT if after the place where the induction variable
633 CAND is incremented in LOOP. */
635 static bool
636 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
638 switch (cand->pos)
640 case IP_END:
641 return false;
643 case IP_NORMAL:
644 return stmt_after_ip_normal_pos (loop, stmt);
646 case IP_ORIGINAL:
647 return stmt_after_ip_original_pos (cand, stmt);
649 default:
650 gcc_unreachable ();
654 /* Element of the table in that we cache the numbers of iterations obtained
655 from exits of the loop. */
657 struct nfe_cache_elt
659 /* The edge for that the number of iterations is cached. */
660 edge exit;
662 /* True if the # of iterations was successfully determined. */
663 bool valid_p;
665 /* Description of # of iterations. */
666 struct tree_niter_desc niter;
669 /* Hash function for nfe_cache_elt E. */
671 static hashval_t
672 nfe_hash (const void *e)
674 const struct nfe_cache_elt *elt = e;
676 return htab_hash_pointer (elt->exit);
679 /* Equality function for nfe_cache_elt E1 and edge E2. */
681 static int
682 nfe_eq (const void *e1, const void *e2)
684 const struct nfe_cache_elt *elt1 = e1;
686 return elt1->exit == e2;
689 /* Returns structure describing number of iterations determined from
690 EXIT of DATA->current_loop, or NULL if something goes wrong. */
692 static struct tree_niter_desc *
693 niter_for_exit (struct ivopts_data *data, edge exit)
695 struct nfe_cache_elt *nfe_desc;
696 PTR *slot;
698 slot = htab_find_slot_with_hash (data->niters, exit,
699 htab_hash_pointer (exit),
700 INSERT);
702 if (!*slot)
704 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
705 nfe_desc->exit = exit;
706 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
707 exit, &nfe_desc->niter);
708 *slot = nfe_desc;
710 else
711 nfe_desc = *slot;
713 if (!nfe_desc->valid_p)
714 return NULL;
716 return &nfe_desc->niter;
719 /* Returns structure describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
723 static struct tree_niter_desc *
724 niter_for_single_dom_exit (struct ivopts_data *data)
726 edge exit = single_dom_exit (data->current_loop);
728 if (!exit)
729 return NULL;
731 return niter_for_exit (data, exit);
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. LOOPS is the loop tree. */
737 static void
738 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
740 unsigned i;
742 data->version_info_size = 2 * num_ssa_names;
743 data->version_info = xcalloc (data->version_info_size,
744 sizeof (struct version_info));
745 data->relevant = BITMAP_ALLOC (NULL);
746 data->important_candidates = BITMAP_ALLOC (NULL);
747 data->max_inv_id = 0;
748 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
750 for (i = 1; i < loops->num; i++)
751 if (loops->parray[i])
752 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
754 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
755 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
756 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
759 /* Returns a memory object to that EXPR points. In case we are able to
760 determine that it does not point to any such object, NULL is returned. */
762 static tree
763 determine_base_object (tree expr)
765 enum tree_code code = TREE_CODE (expr);
766 tree base, obj, op0, op1;
768 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
769 return NULL_TREE;
771 switch (code)
773 case INTEGER_CST:
774 return NULL_TREE;
776 case ADDR_EXPR:
777 obj = TREE_OPERAND (expr, 0);
778 base = get_base_address (obj);
780 if (!base)
781 return expr;
783 if (TREE_CODE (base) == INDIRECT_REF)
784 return determine_base_object (TREE_OPERAND (base, 0));
786 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
793 if (!op1)
794 return op0;
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
801 return fold (build (code, ptr_type_node, op0, op1));
803 case NOP_EXPR:
804 case CONVERT_EXPR:
805 return determine_base_object (TREE_OPERAND (expr, 0));
807 default:
808 return fold_convert (ptr_type_node, expr);
812 /* Allocates an induction variable with given initial value BASE and step STEP
813 for loop LOOP. */
815 static struct iv *
816 alloc_iv (tree base, tree step)
818 struct iv *iv = xcalloc (1, sizeof (struct iv));
820 if (step && integer_zerop (step))
821 step = NULL_TREE;
823 iv->base = base;
824 iv->base_object = determine_base_object (base);
825 iv->step = step;
826 iv->biv_p = false;
827 iv->have_use_for = false;
828 iv->use_id = 0;
829 iv->ssa_name = NULL_TREE;
831 return iv;
834 /* Sets STEP and BASE for induction variable IV. */
836 static void
837 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
839 struct version_info *info = name_info (data, iv);
841 gcc_assert (!info->iv);
843 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
844 info->iv = alloc_iv (base, step);
845 info->iv->ssa_name = iv;
848 /* Finds induction variable declaration for VAR. */
850 static struct iv *
851 get_iv (struct ivopts_data *data, tree var)
853 basic_block bb;
855 if (!name_info (data, var)->iv)
857 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
859 if (!bb
860 || !flow_bb_inside_loop_p (data->current_loop, bb))
861 set_iv (data, var, var, NULL_TREE);
864 return name_info (data, var)->iv;
867 /* Determines the step of a biv defined in PHI. */
869 static tree
870 determine_biv_step (tree phi)
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi), base, step;
874 tree type = TREE_TYPE (name);
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
879 if (!simple_iv (loop, phi, name, &base, &step))
880 return NULL_TREE;
882 if (!step)
883 return build_int_cst (type, 0);
885 return step;
888 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
890 static bool
891 abnormal_ssa_name_p (tree exp)
893 if (!exp)
894 return false;
896 if (TREE_CODE (exp) != SSA_NAME)
897 return false;
899 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
902 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
903 abnormal phi node. Callback for for_each_index. */
905 static bool
906 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
907 void *data ATTRIBUTE_UNUSED)
909 if (TREE_CODE (base) == ARRAY_REF)
911 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
912 return false;
913 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
914 return false;
917 return !abnormal_ssa_name_p (*index);
920 /* Returns true if EXPR contains a ssa name that occurs in an
921 abnormal phi node. */
923 static bool
924 contains_abnormal_ssa_name_p (tree expr)
926 enum tree_code code = TREE_CODE (expr);
927 enum tree_code_class class = TREE_CODE_CLASS (code);
929 if (code == SSA_NAME)
930 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
932 if (code == INTEGER_CST
933 || is_gimple_min_invariant (expr))
934 return false;
936 if (code == ADDR_EXPR)
937 return !for_each_index (&TREE_OPERAND (expr, 0),
938 idx_contains_abnormal_ssa_name_p,
939 NULL);
941 switch (class)
943 case tcc_binary:
944 case tcc_comparison:
945 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
946 return true;
948 /* Fallthru. */
949 case tcc_unary:
950 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
951 return true;
953 break;
955 default:
956 gcc_unreachable ();
959 return false;
962 /* Finds basic ivs. */
964 static bool
965 find_bivs (struct ivopts_data *data)
967 tree phi, step, type, base;
968 bool found = false;
969 struct loop *loop = data->current_loop;
971 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
973 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
974 continue;
976 step = determine_biv_step (phi);
978 if (!step)
979 continue;
980 if (cst_and_fits_in_hwi (step)
981 && int_cst_value (step) == 0)
982 continue;
984 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
985 if (contains_abnormal_ssa_name_p (base))
986 continue;
988 type = TREE_TYPE (PHI_RESULT (phi));
989 base = fold_convert (type, base);
990 step = fold_convert (type, step);
992 /* FIXME: We do not handle induction variables whose step does
993 not satisfy cst_and_fits_in_hwi. */
994 if (!cst_and_fits_in_hwi (step))
995 continue;
997 set_iv (data, PHI_RESULT (phi), base, step);
998 found = true;
1001 return found;
1004 /* Marks basic ivs. */
1006 static void
1007 mark_bivs (struct ivopts_data *data)
1009 tree phi, var;
1010 struct iv *iv, *incr_iv;
1011 struct loop *loop = data->current_loop;
1012 basic_block incr_bb;
1014 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1016 iv = get_iv (data, PHI_RESULT (phi));
1017 if (!iv)
1018 continue;
1020 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1021 incr_iv = get_iv (data, var);
1022 if (!incr_iv)
1023 continue;
1025 /* If the increment is in the subloop, ignore it. */
1026 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1027 if (incr_bb->loop_father != data->current_loop
1028 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1029 continue;
1031 iv->biv_p = true;
1032 incr_iv->biv_p = true;
1036 /* Checks whether STMT defines a linear induction variable and stores its
1037 parameters to BASE and STEP. */
1039 static bool
1040 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1041 tree *base, tree *step)
1043 tree lhs;
1044 struct loop *loop = data->current_loop;
1046 *base = NULL_TREE;
1047 *step = NULL_TREE;
1049 if (TREE_CODE (stmt) != MODIFY_EXPR)
1050 return false;
1052 lhs = TREE_OPERAND (stmt, 0);
1053 if (TREE_CODE (lhs) != SSA_NAME)
1054 return false;
1056 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1057 return false;
1059 /* FIXME: We do not handle induction variables whose step does
1060 not satisfy cst_and_fits_in_hwi. */
1061 if (!zero_p (*step)
1062 && !cst_and_fits_in_hwi (*step))
1063 return false;
1065 if (contains_abnormal_ssa_name_p (*base))
1066 return false;
1068 return true;
1071 /* Finds general ivs in statement STMT. */
1073 static void
1074 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1076 tree base, step;
1078 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1079 return;
1081 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1084 /* Finds general ivs in basic block BB. */
1086 static void
1087 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1089 block_stmt_iterator bsi;
1091 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1092 find_givs_in_stmt (data, bsi_stmt (bsi));
1095 /* Finds general ivs. */
1097 static void
1098 find_givs (struct ivopts_data *data)
1100 struct loop *loop = data->current_loop;
1101 basic_block *body = get_loop_body_in_dom_order (loop);
1102 unsigned i;
1104 for (i = 0; i < loop->num_nodes; i++)
1105 find_givs_in_bb (data, body[i]);
1106 free (body);
1109 /* For each ssa name defined in LOOP determines whether it is an induction
1110 variable and if so, its initial value and step. */
1112 static bool
1113 find_induction_variables (struct ivopts_data *data)
1115 unsigned i;
1116 bitmap_iterator bi;
1118 if (!find_bivs (data))
1119 return false;
1121 find_givs (data);
1122 mark_bivs (data);
1124 if (dump_file && (dump_flags & TDF_DETAILS))
1126 struct tree_niter_desc *niter;
1128 niter = niter_for_single_dom_exit (data);
1130 if (niter)
1132 fprintf (dump_file, " number of iterations ");
1133 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1134 fprintf (dump_file, "\n");
1136 fprintf (dump_file, " may be zero if ");
1137 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1138 fprintf (dump_file, "\n");
1139 fprintf (dump_file, "\n");
1142 fprintf (dump_file, "Induction variables:\n\n");
1144 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1146 if (ver_info (data, i)->iv)
1147 dump_iv (dump_file, ver_info (data, i)->iv);
1151 return true;
1154 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1156 static struct iv_use *
1157 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1158 tree stmt, enum use_type use_type)
1160 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1162 use->id = n_iv_uses (data);
1163 use->type = use_type;
1164 use->iv = iv;
1165 use->stmt = stmt;
1166 use->op_p = use_p;
1167 use->related_cands = BITMAP_ALLOC (NULL);
1169 /* To avoid showing ssa name in the dumps, if it was not reset by the
1170 caller. */
1171 iv->ssa_name = NULL_TREE;
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1174 dump_use (dump_file, use);
1176 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1178 return use;
1181 /* Checks whether OP is a loop-level invariant and if so, records it.
1182 NONLINEAR_USE is true if the invariant is used in a way we do not
1183 handle specially. */
1185 static void
1186 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1188 basic_block bb;
1189 struct version_info *info;
1191 if (TREE_CODE (op) != SSA_NAME
1192 || !is_gimple_reg (op))
1193 return;
1195 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1196 if (bb
1197 && flow_bb_inside_loop_p (data->current_loop, bb))
1198 return;
1200 info = name_info (data, op);
1201 info->name = op;
1202 info->has_nonlin_use |= nonlinear_use;
1203 if (!info->inv_id)
1204 info->inv_id = ++data->max_inv_id;
1205 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 /* Checks whether the use OP is interesting and if so, records it
1209 as TYPE. */
1211 static struct iv_use *
1212 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1213 enum use_type type)
1215 struct iv *iv;
1216 struct iv *civ;
1217 tree stmt;
1218 struct iv_use *use;
1220 if (TREE_CODE (op) != SSA_NAME)
1221 return NULL;
1223 iv = get_iv (data, op);
1224 if (!iv)
1225 return NULL;
1227 if (iv->have_use_for)
1229 use = iv_use (data, iv->use_id);
1231 gcc_assert (use->type == USE_NONLINEAR_EXPR
1232 || use->type == USE_OUTER);
1234 if (type == USE_NONLINEAR_EXPR)
1235 use->type = USE_NONLINEAR_EXPR;
1236 return use;
1239 if (zero_p (iv->step))
1241 record_invariant (data, op, true);
1242 return NULL;
1244 iv->have_use_for = true;
1246 civ = xmalloc (sizeof (struct iv));
1247 *civ = *iv;
1249 stmt = SSA_NAME_DEF_STMT (op);
1250 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1251 || TREE_CODE (stmt) == MODIFY_EXPR);
1253 use = record_use (data, NULL, civ, stmt, type);
1254 iv->use_id = use->id;
1256 return use;
1259 /* Checks whether the use OP is interesting and if so, records it. */
1261 static struct iv_use *
1262 find_interesting_uses_op (struct ivopts_data *data, tree op)
1264 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 /* Records a definition of induction variable OP that is used outside of the
1268 loop. */
1270 static struct iv_use *
1271 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1273 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 /* Checks whether the condition *COND_P in STMT is interesting
1277 and if so, records it. */
1279 static void
1280 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1282 tree *op0_p;
1283 tree *op1_p;
1284 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1285 struct iv const_iv;
1286 tree zero = integer_zero_node;
1288 const_iv.step = NULL_TREE;
1290 if (TREE_CODE (*cond_p) != SSA_NAME
1291 && !COMPARISON_CLASS_P (*cond_p))
1292 return;
1294 if (TREE_CODE (*cond_p) == SSA_NAME)
1296 op0_p = cond_p;
1297 op1_p = &zero;
1299 else
1301 op0_p = &TREE_OPERAND (*cond_p, 0);
1302 op1_p = &TREE_OPERAND (*cond_p, 1);
1305 if (TREE_CODE (*op0_p) == SSA_NAME)
1306 iv0 = get_iv (data, *op0_p);
1307 else
1308 iv0 = &const_iv;
1310 if (TREE_CODE (*op1_p) == SSA_NAME)
1311 iv1 = get_iv (data, *op1_p);
1312 else
1313 iv1 = &const_iv;
1315 if (/* When comparing with non-invariant value, we may not do any senseful
1316 induction variable elimination. */
1317 (!iv0 || !iv1)
1318 /* Eliminating condition based on two ivs would be nontrivial.
1319 ??? TODO -- it is not really important to handle this case. */
1320 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1322 find_interesting_uses_op (data, *op0_p);
1323 find_interesting_uses_op (data, *op1_p);
1324 return;
1327 if (zero_p (iv0->step) && zero_p (iv1->step))
1329 /* If both are invariants, this is a work for unswitching. */
1330 return;
1333 civ = xmalloc (sizeof (struct iv));
1334 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1335 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 /* Returns true if expression EXPR is obviously invariant in LOOP,
1339 i.e. if all its operands are defined outside of the LOOP. */
1341 bool
1342 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1344 basic_block def_bb;
1345 unsigned i, len;
1347 if (is_gimple_min_invariant (expr))
1348 return true;
1350 if (TREE_CODE (expr) == SSA_NAME)
1352 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1353 if (def_bb
1354 && flow_bb_inside_loop_p (loop, def_bb))
1355 return false;
1357 return true;
1360 if (!EXPR_P (expr))
1361 return false;
1363 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1364 for (i = 0; i < len; i++)
1365 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1366 return false;
1368 return true;
1371 /* Cumulates the steps of indices into DATA and replaces their values with the
1372 initial ones. Returns false when the value of the index cannot be determined.
1373 Callback for for_each_index. */
1375 struct ifs_ivopts_data
1377 struct ivopts_data *ivopts_data;
1378 tree stmt;
1379 tree *step_p;
1382 static bool
1383 idx_find_step (tree base, tree *idx, void *data)
1385 struct ifs_ivopts_data *dta = data;
1386 struct iv *iv;
1387 tree step, type, iv_type, iv_step, lbound, off;
1388 struct loop *loop = dta->ivopts_data->current_loop;
1390 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1391 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1392 return false;
1394 /* If base is a component ref, require that the offset of the reference
1395 be invariant. */
1396 if (TREE_CODE (base) == COMPONENT_REF)
1398 off = component_ref_field_offset (base);
1399 return expr_invariant_in_loop_p (loop, off);
1402 /* If base is array, first check whether we will be able to move the
1403 reference out of the loop (in order to take its address in strength
1404 reduction). In order for this to work we need both lower bound
1405 and step to be loop invariants. */
1406 if (TREE_CODE (base) == ARRAY_REF)
1408 step = array_ref_element_size (base);
1409 lbound = array_ref_low_bound (base);
1411 if (!expr_invariant_in_loop_p (loop, step)
1412 || !expr_invariant_in_loop_p (loop, lbound))
1413 return false;
1416 if (TREE_CODE (*idx) != SSA_NAME)
1417 return true;
1419 iv = get_iv (dta->ivopts_data, *idx);
1420 if (!iv)
1421 return false;
1423 *idx = iv->base;
1425 if (!iv->step)
1426 return true;
1428 iv_type = TREE_TYPE (iv->base);
1429 type = build_pointer_type (TREE_TYPE (base));
1430 if (TREE_CODE (base) == ARRAY_REF)
1432 step = array_ref_element_size (base);
1434 /* We only handle addresses whose step is an integer constant. */
1435 if (TREE_CODE (step) != INTEGER_CST)
1436 return false;
1438 else
1439 /* The step for pointer arithmetics already is 1 byte. */
1440 step = build_int_cst (type, 1);
1442 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1443 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1444 type, iv->base, iv->step, dta->stmt);
1445 else
1446 iv_step = fold_convert (iv_type, iv->step);
1448 if (!iv_step)
1450 /* The index might wrap. */
1451 return false;
1454 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1456 if (!*dta->step_p)
1457 *dta->step_p = step;
1458 else
1459 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1460 *dta->step_p, step);
1462 return true;
1465 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1466 object is passed to it in DATA. */
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470 void *data)
1472 find_interesting_uses_op (data, *idx);
1473 if (TREE_CODE (base) == ARRAY_REF)
1475 find_interesting_uses_op (data, array_ref_element_size (base));
1476 find_interesting_uses_op (data, array_ref_low_bound (base));
1478 return true;
1481 /* Returns true if memory reference REF may be unaligned. */
1483 static bool
1484 may_be_unaligned_p (tree ref)
1486 tree base;
1487 tree base_type;
1488 HOST_WIDE_INT bitsize;
1489 HOST_WIDE_INT bitpos;
1490 tree toffset;
1491 enum machine_mode mode;
1492 int unsignedp, volatilep;
1493 unsigned base_align;
1495 /* The test below is basically copy of what expr.c:normal_inner_ref
1496 does to check whether the object must be loaded by parts when
1497 STRICT_ALIGNMENT is true. */
1498 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1499 &unsignedp, &volatilep, true);
1500 base_type = TREE_TYPE (base);
1501 base_align = TYPE_ALIGN (base_type);
1503 if (mode != BLKmode
1504 && (base_align < GET_MODE_ALIGNMENT (mode)
1505 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1506 || bitpos % BITS_PER_UNIT != 0))
1507 return true;
1509 return false;
1512 /* Finds addresses in *OP_P inside STMT. */
1514 static void
1515 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1517 tree base = unshare_expr (*op_p), step = NULL;
1518 struct iv *civ;
1519 struct ifs_ivopts_data ifs_ivopts_data;
1521 /* Do not play with volatile memory references. A bit too conservative,
1522 perhaps, but safe. */
1523 if (stmt_ann (stmt)->has_volatile_ops)
1524 goto fail;
1526 /* Ignore bitfields for now. Not really something terribly complicated
1527 to handle. TODO. */
1528 if (TREE_CODE (base) == COMPONENT_REF
1529 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1530 goto fail;
1532 if (STRICT_ALIGNMENT
1533 && may_be_unaligned_p (base))
1534 goto fail;
1536 ifs_ivopts_data.ivopts_data = data;
1537 ifs_ivopts_data.stmt = stmt;
1538 ifs_ivopts_data.step_p = &step;
1539 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1540 || zero_p (step))
1541 goto fail;
1543 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1544 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1546 if (TREE_CODE (base) == INDIRECT_REF)
1547 base = TREE_OPERAND (base, 0);
1548 else
1549 base = build_addr (base);
1551 civ = alloc_iv (base, step);
1552 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1553 return;
1555 fail:
1556 for_each_index (op_p, idx_record_use, data);
1559 /* Finds and records invariants used in STMT. */
1561 static void
1562 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1564 use_optype uses = NULL;
1565 unsigned i, n;
1566 tree op;
1568 if (TREE_CODE (stmt) == PHI_NODE)
1569 n = PHI_NUM_ARGS (stmt);
1570 else
1572 uses = STMT_USE_OPS (stmt);
1573 n = NUM_USES (uses);
1576 for (i = 0; i < n; i++)
1578 if (TREE_CODE (stmt) == PHI_NODE)
1579 op = PHI_ARG_DEF (stmt, i);
1580 else
1581 op = USE_OP (uses, i);
1583 record_invariant (data, op, false);
1587 /* Finds interesting uses of induction variables in the statement STMT. */
1589 static void
1590 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1592 struct iv *iv;
1593 tree op, lhs, rhs;
1594 use_optype uses = NULL;
1595 unsigned i, n;
1597 find_invariants_stmt (data, stmt);
1599 if (TREE_CODE (stmt) == COND_EXPR)
1601 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1602 return;
1605 if (TREE_CODE (stmt) == MODIFY_EXPR)
1607 lhs = TREE_OPERAND (stmt, 0);
1608 rhs = TREE_OPERAND (stmt, 1);
1610 if (TREE_CODE (lhs) == SSA_NAME)
1612 /* If the statement defines an induction variable, the uses are not
1613 interesting by themselves. */
1615 iv = get_iv (data, lhs);
1617 if (iv && !zero_p (iv->step))
1618 return;
1621 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1623 case tcc_comparison:
1624 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1625 return;
1627 case tcc_reference:
1628 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1629 if (REFERENCE_CLASS_P (lhs))
1630 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1631 return;
1633 default: ;
1636 if (REFERENCE_CLASS_P (lhs)
1637 && is_gimple_val (rhs))
1639 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1640 find_interesting_uses_op (data, rhs);
1641 return;
1644 /* TODO -- we should also handle address uses of type
1646 memory = call (whatever);
1650 call (memory). */
1653 if (TREE_CODE (stmt) == PHI_NODE
1654 && bb_for_stmt (stmt) == data->current_loop->header)
1656 lhs = PHI_RESULT (stmt);
1657 iv = get_iv (data, lhs);
1659 if (iv && !zero_p (iv->step))
1660 return;
1663 if (TREE_CODE (stmt) == PHI_NODE)
1664 n = PHI_NUM_ARGS (stmt);
1665 else
1667 uses = STMT_USE_OPS (stmt);
1668 n = NUM_USES (uses);
1671 for (i = 0; i < n; i++)
1673 if (TREE_CODE (stmt) == PHI_NODE)
1674 op = PHI_ARG_DEF (stmt, i);
1675 else
1676 op = USE_OP (uses, i);
1678 if (TREE_CODE (op) != SSA_NAME)
1679 continue;
1681 iv = get_iv (data, op);
1682 if (!iv)
1683 continue;
1685 find_interesting_uses_op (data, op);
1689 /* Finds interesting uses of induction variables outside of loops
1690 on loop exit edge EXIT. */
1692 static void
1693 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1695 tree phi, def;
1697 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1699 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1700 find_interesting_uses_outer (data, def);
1704 /* Finds uses of the induction variables that are interesting. */
1706 static void
1707 find_interesting_uses (struct ivopts_data *data)
1709 basic_block bb;
1710 block_stmt_iterator bsi;
1711 tree phi;
1712 basic_block *body = get_loop_body (data->current_loop);
1713 unsigned i;
1714 struct version_info *info;
1715 edge e;
1717 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "Uses:\n\n");
1720 for (i = 0; i < data->current_loop->num_nodes; i++)
1722 edge_iterator ei;
1723 bb = body[i];
1725 FOR_EACH_EDGE (e, ei, bb->succs)
1726 if (e->dest != EXIT_BLOCK_PTR
1727 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1728 find_interesting_uses_outside (data, e);
1730 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1731 find_interesting_uses_stmt (data, phi);
1732 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1733 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1738 bitmap_iterator bi;
1740 fprintf (dump_file, "\n");
1742 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1744 info = ver_info (data, i);
1745 if (info->inv_id)
1747 fprintf (dump_file, " ");
1748 print_generic_expr (dump_file, info->name, TDF_SLIM);
1749 fprintf (dump_file, " is invariant (%d)%s\n",
1750 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1754 fprintf (dump_file, "\n");
1757 free (body);
1760 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1761 is true, assume we are inside an address. */
1763 static tree
1764 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1766 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1767 enum tree_code code;
1768 tree type, orig_type = TREE_TYPE (expr);
1769 unsigned HOST_WIDE_INT off0, off1, st;
1770 tree orig_expr = expr;
1772 STRIP_NOPS (expr);
1773 type = TREE_TYPE (expr);
1774 code = TREE_CODE (expr);
1775 *offset = 0;
1777 switch (code)
1779 case INTEGER_CST:
1780 if (!cst_and_fits_in_hwi (expr)
1781 || zero_p (expr))
1782 return orig_expr;
1784 *offset = int_cst_value (expr);
1785 return build_int_cst_type (orig_type, 0);
1787 case PLUS_EXPR:
1788 case MINUS_EXPR:
1789 op0 = TREE_OPERAND (expr, 0);
1790 op1 = TREE_OPERAND (expr, 1);
1792 op0 = strip_offset (op0, false, &off0);
1793 op1 = strip_offset (op1, false, &off1);
1795 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1796 if (op0 == TREE_OPERAND (expr, 0)
1797 && op1 == TREE_OPERAND (expr, 1))
1798 return orig_expr;
1800 if (zero_p (op1))
1801 expr = op0;
1802 else if (zero_p (op0))
1804 if (code == PLUS_EXPR)
1805 expr = op1;
1806 else
1807 expr = build1 (NEGATE_EXPR, type, op1);
1809 else
1810 expr = build2 (code, type, op0, op1);
1812 return fold_convert (orig_type, expr);
1814 case ARRAY_REF:
1815 if (!inside_addr)
1816 return orig_expr;
1818 step = array_ref_element_size (expr);
1819 if (!cst_and_fits_in_hwi (step))
1820 break;
1822 st = int_cst_value (step);
1823 op1 = TREE_OPERAND (expr, 1);
1824 op1 = strip_offset (op1, false, &off1);
1825 *offset = off1 * st;
1826 break;
1828 case COMPONENT_REF:
1829 if (!inside_addr)
1830 return orig_expr;
1831 break;
1833 case ADDR_EXPR:
1834 inside_addr = true;
1835 break;
1837 default:
1838 return orig_expr;
1841 /* Default handling of expressions for that we want to recurse into
1842 the first operand. */
1843 op0 = TREE_OPERAND (expr, 0);
1844 op0 = strip_offset (op0, inside_addr, &off0);
1845 *offset += off0;
1847 if (op0 == TREE_OPERAND (expr, 0)
1848 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1849 return orig_expr;
1851 expr = copy_node (expr);
1852 TREE_OPERAND (expr, 0) = op0;
1853 if (op1)
1854 TREE_OPERAND (expr, 1) = op1;
1856 return fold_convert (orig_type, expr);
1859 /* Returns variant of TYPE that can be used as base for different uses.
1860 For integer types, we return unsigned variant of the type, which
1861 avoids problems with overflows. For pointer types, we return void *. */
1863 static tree
1864 generic_type_for (tree type)
1866 if (POINTER_TYPE_P (type))
1867 return ptr_type_node;
1869 if (TYPE_UNSIGNED (type))
1870 return type;
1872 return unsigned_type_for (type);
1875 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1876 position to POS. If USE is not NULL, the candidate is set as related to
1877 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1878 replacement of the final value of the iv by a direct computation. */
1880 static struct iv_cand *
1881 add_candidate_1 (struct ivopts_data *data,
1882 tree base, tree step, bool important, enum iv_position pos,
1883 struct iv_use *use, tree incremented_at)
1885 unsigned i;
1886 struct iv_cand *cand = NULL;
1887 tree type, orig_type;
1889 if (base)
1891 orig_type = TREE_TYPE (base);
1892 type = generic_type_for (orig_type);
1893 if (type != orig_type)
1895 base = fold_convert (type, base);
1896 if (step)
1897 step = fold_convert (type, step);
1901 for (i = 0; i < n_iv_cands (data); i++)
1903 cand = iv_cand (data, i);
1905 if (cand->pos != pos)
1906 continue;
1908 if (cand->incremented_at != incremented_at)
1909 continue;
1911 if (!cand->iv)
1913 if (!base && !step)
1914 break;
1916 continue;
1919 if (!base && !step)
1920 continue;
1922 if (!operand_equal_p (base, cand->iv->base, 0))
1923 continue;
1925 if (zero_p (cand->iv->step))
1927 if (zero_p (step))
1928 break;
1930 else
1932 if (step && operand_equal_p (step, cand->iv->step, 0))
1933 break;
1937 if (i == n_iv_cands (data))
1939 cand = xcalloc (1, sizeof (struct iv_cand));
1940 cand->id = i;
1942 if (!base && !step)
1943 cand->iv = NULL;
1944 else
1945 cand->iv = alloc_iv (base, step);
1947 cand->pos = pos;
1948 if (pos != IP_ORIGINAL && cand->iv)
1950 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1951 cand->var_after = cand->var_before;
1953 cand->important = important;
1954 cand->incremented_at = incremented_at;
1955 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1957 if (dump_file && (dump_flags & TDF_DETAILS))
1958 dump_cand (dump_file, cand);
1961 if (important && !cand->important)
1963 cand->important = true;
1964 if (dump_file && (dump_flags & TDF_DETAILS))
1965 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1968 if (use)
1970 bitmap_set_bit (use->related_cands, i);
1971 if (dump_file && (dump_flags & TDF_DETAILS))
1972 fprintf (dump_file, "Candidate %d is related to use %d\n",
1973 cand->id, use->id);
1976 return cand;
1979 /* Returns true if incrementing the induction variable at the end of the LOOP
1980 is allowed.
1982 The purpose is to avoid splitting latch edge with a biv increment, thus
1983 creating a jump, possibly confusing other optimization passes and leaving
1984 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1985 is not available (so we do not have a better alternative), or if the latch
1986 edge is already nonempty. */
1988 static bool
1989 allow_ip_end_pos_p (struct loop *loop)
1991 if (!ip_normal_pos (loop))
1992 return true;
1994 if (!empty_block_p (ip_end_pos (loop)))
1995 return true;
1997 return false;
2000 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2001 position to POS. If USE is not NULL, the candidate is set as related to
2002 it. The candidate computation is scheduled on all available positions. */
2004 static void
2005 add_candidate (struct ivopts_data *data,
2006 tree base, tree step, bool important, struct iv_use *use)
2008 if (ip_normal_pos (data->current_loop))
2009 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2010 if (ip_end_pos (data->current_loop)
2011 && allow_ip_end_pos_p (data->current_loop))
2012 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2015 /* Add a standard "0 + 1 * iteration" iv candidate for a
2016 type with SIZE bits. */
2018 static void
2019 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2020 unsigned int size)
2022 tree type = lang_hooks.types.type_for_size (size, true);
2023 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2024 true, NULL);
2027 /* Adds standard iv candidates. */
2029 static void
2030 add_standard_iv_candidates (struct ivopts_data *data)
2032 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2034 /* The same for a double-integer type if it is still fast enough. */
2035 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2036 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2040 /* Adds candidates bases on the old induction variable IV. */
2042 static void
2043 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2045 tree phi, def;
2046 struct iv_cand *cand;
2048 add_candidate (data, iv->base, iv->step, true, NULL);
2050 /* The same, but with initial value zero. */
2051 add_candidate (data,
2052 build_int_cst (TREE_TYPE (iv->base), 0),
2053 iv->step, true, NULL);
2055 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2056 if (TREE_CODE (phi) == PHI_NODE)
2058 /* Additionally record the possibility of leaving the original iv
2059 untouched. */
2060 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2061 cand = add_candidate_1 (data,
2062 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2063 SSA_NAME_DEF_STMT (def));
2064 cand->var_before = iv->ssa_name;
2065 cand->var_after = def;
2069 /* Adds candidates based on the old induction variables. */
2071 static void
2072 add_old_ivs_candidates (struct ivopts_data *data)
2074 unsigned i;
2075 struct iv *iv;
2076 bitmap_iterator bi;
2078 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2080 iv = ver_info (data, i)->iv;
2081 if (iv && iv->biv_p && !zero_p (iv->step))
2082 add_old_iv_candidates (data, iv);
2086 /* Adds candidates based on the value of the induction variable IV and USE. */
2088 static void
2089 add_iv_value_candidates (struct ivopts_data *data,
2090 struct iv *iv, struct iv_use *use)
2092 add_candidate (data, iv->base, iv->step, false, use);
2094 /* The same, but with initial value zero. */
2095 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2096 iv->step, false, use);
2099 /* Adds candidates based on the address IV and USE. */
2101 static void
2102 add_address_candidates (struct ivopts_data *data,
2103 struct iv *iv, struct iv_use *use)
2105 tree base, abase;
2106 unsigned HOST_WIDE_INT offset;
2108 /* First, the trivial choices. */
2109 add_iv_value_candidates (data, iv, use);
2111 /* Second, try removing the COMPONENT_REFs. */
2112 if (TREE_CODE (iv->base) == ADDR_EXPR)
2114 base = TREE_OPERAND (iv->base, 0);
2115 while (TREE_CODE (base) == COMPONENT_REF
2116 || (TREE_CODE (base) == ARRAY_REF
2117 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2118 base = TREE_OPERAND (base, 0);
2120 if (base != TREE_OPERAND (iv->base, 0))
2122 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2123 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2125 if (TREE_CODE (base) == INDIRECT_REF)
2126 base = TREE_OPERAND (base, 0);
2127 else
2128 base = build_addr (base);
2129 add_candidate (data, base, iv->step, false, use);
2133 /* Third, try removing the constant offset. */
2134 abase = iv->base;
2135 base = strip_offset (abase, false, &offset);
2136 if (offset)
2137 add_candidate (data, base, iv->step, false, use);
2140 /* Possibly adds pseudocandidate for replacing the final value of USE by
2141 a direct computation. */
2143 static void
2144 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2146 struct tree_niter_desc *niter;
2148 /* We must know where we exit the loop and how many times does it roll. */
2149 niter = niter_for_single_dom_exit (data);
2150 if (!niter
2151 || !zero_p (niter->may_be_zero))
2152 return;
2154 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2157 /* Adds candidates based on the uses. */
2159 static void
2160 add_derived_ivs_candidates (struct ivopts_data *data)
2162 unsigned i;
2164 for (i = 0; i < n_iv_uses (data); i++)
2166 struct iv_use *use = iv_use (data, i);
2168 if (!use)
2169 continue;
2171 switch (use->type)
2173 case USE_NONLINEAR_EXPR:
2174 case USE_COMPARE:
2175 /* Just add the ivs based on the value of the iv used here. */
2176 add_iv_value_candidates (data, use->iv, use);
2177 break;
2179 case USE_OUTER:
2180 add_iv_value_candidates (data, use->iv, use);
2182 /* Additionally, add the pseudocandidate for the possibility to
2183 replace the final value by a direct computation. */
2184 add_iv_outer_candidates (data, use);
2185 break;
2187 case USE_ADDRESS:
2188 add_address_candidates (data, use->iv, use);
2189 break;
2191 default:
2192 gcc_unreachable ();
2197 /* Record important candidates and add them to related_cands bitmaps
2198 if needed. */
2200 static void
2201 record_important_candidates (struct ivopts_data *data)
2203 unsigned i;
2204 struct iv_use *use;
2206 for (i = 0; i < n_iv_cands (data); i++)
2208 struct iv_cand *cand = iv_cand (data, i);
2210 if (cand->important)
2211 bitmap_set_bit (data->important_candidates, i);
2214 data->consider_all_candidates = (n_iv_cands (data)
2215 <= CONSIDER_ALL_CANDIDATES_BOUND);
2217 if (data->consider_all_candidates)
2219 /* We will not need "related_cands" bitmaps in this case,
2220 so release them to decrease peak memory consumption. */
2221 for (i = 0; i < n_iv_uses (data); i++)
2223 use = iv_use (data, i);
2224 BITMAP_FREE (use->related_cands);
2227 else
2229 /* Add important candidates to the related_cands bitmaps. */
2230 for (i = 0; i < n_iv_uses (data); i++)
2231 bitmap_ior_into (iv_use (data, i)->related_cands,
2232 data->important_candidates);
2236 /* Finds the candidates for the induction variables. */
2238 static void
2239 find_iv_candidates (struct ivopts_data *data)
2241 /* Add commonly used ivs. */
2242 add_standard_iv_candidates (data);
2244 /* Add old induction variables. */
2245 add_old_ivs_candidates (data);
2247 /* Add induction variables derived from uses. */
2248 add_derived_ivs_candidates (data);
2250 /* Record the important candidates. */
2251 record_important_candidates (data);
2254 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2255 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2256 we allocate a simple list to every use. */
2258 static void
2259 alloc_use_cost_map (struct ivopts_data *data)
2261 unsigned i, size, s, j;
2263 for (i = 0; i < n_iv_uses (data); i++)
2265 struct iv_use *use = iv_use (data, i);
2266 bitmap_iterator bi;
2268 if (data->consider_all_candidates)
2269 size = n_iv_cands (data);
2270 else
2272 s = 0;
2273 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2275 s++;
2278 /* Round up to the power of two, so that moduling by it is fast. */
2279 for (size = 1; size < s; size <<= 1)
2280 continue;
2283 use->n_map_members = size;
2284 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2288 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2289 on invariants DEPENDS_ON and that the value used in expressing it
2290 is VALUE.*/
2292 static void
2293 set_use_iv_cost (struct ivopts_data *data,
2294 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2295 bitmap depends_on, tree value)
2297 unsigned i, s;
2299 if (cost == INFTY)
2301 BITMAP_FREE (depends_on);
2302 return;
2305 if (data->consider_all_candidates)
2307 use->cost_map[cand->id].cand = cand;
2308 use->cost_map[cand->id].cost = cost;
2309 use->cost_map[cand->id].depends_on = depends_on;
2310 use->cost_map[cand->id].value = value;
2311 return;
2314 /* n_map_members is a power of two, so this computes modulo. */
2315 s = cand->id & (use->n_map_members - 1);
2316 for (i = s; i < use->n_map_members; i++)
2317 if (!use->cost_map[i].cand)
2318 goto found;
2319 for (i = 0; i < s; i++)
2320 if (!use->cost_map[i].cand)
2321 goto found;
2323 gcc_unreachable ();
2325 found:
2326 use->cost_map[i].cand = cand;
2327 use->cost_map[i].cost = cost;
2328 use->cost_map[i].depends_on = depends_on;
2329 use->cost_map[i].value = value;
2332 /* Gets cost of (USE, CANDIDATE) pair. */
2334 static struct cost_pair *
2335 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2336 struct iv_cand *cand)
2338 unsigned i, s;
2339 struct cost_pair *ret;
2341 if (!cand)
2342 return NULL;
2344 if (data->consider_all_candidates)
2346 ret = use->cost_map + cand->id;
2347 if (!ret->cand)
2348 return NULL;
2350 return ret;
2353 /* n_map_members is a power of two, so this computes modulo. */
2354 s = cand->id & (use->n_map_members - 1);
2355 for (i = s; i < use->n_map_members; i++)
2356 if (use->cost_map[i].cand == cand)
2357 return use->cost_map + i;
2359 for (i = 0; i < s; i++)
2360 if (use->cost_map[i].cand == cand)
2361 return use->cost_map + i;
2363 return NULL;
2366 /* Returns estimate on cost of computing SEQ. */
2368 static unsigned
2369 seq_cost (rtx seq)
2371 unsigned cost = 0;
2372 rtx set;
2374 for (; seq; seq = NEXT_INSN (seq))
2376 set = single_set (seq);
2377 if (set)
2378 cost += rtx_cost (set, SET);
2379 else
2380 cost++;
2383 return cost;
2386 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2387 static rtx
2388 produce_memory_decl_rtl (tree obj, int *regno)
2390 rtx x;
2392 gcc_assert (obj);
2393 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2395 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2396 x = gen_rtx_SYMBOL_REF (Pmode, name);
2398 else
2399 x = gen_raw_REG (Pmode, (*regno)++);
2401 return gen_rtx_MEM (DECL_MODE (obj), x);
2404 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2405 walk_tree. DATA contains the actual fake register number. */
2407 static tree
2408 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2410 tree obj = NULL_TREE;
2411 rtx x = NULL_RTX;
2412 int *regno = data;
2414 switch (TREE_CODE (*expr_p))
2416 case ADDR_EXPR:
2417 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2418 handled_component_p (*expr_p);
2419 expr_p = &TREE_OPERAND (*expr_p, 0))
2420 continue;
2421 obj = *expr_p;
2422 if (DECL_P (obj))
2423 x = produce_memory_decl_rtl (obj, regno);
2424 break;
2426 case SSA_NAME:
2427 *ws = 0;
2428 obj = SSA_NAME_VAR (*expr_p);
2429 if (!DECL_RTL_SET_P (obj))
2430 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2431 break;
2433 case VAR_DECL:
2434 case PARM_DECL:
2435 case RESULT_DECL:
2436 *ws = 0;
2437 obj = *expr_p;
2439 if (DECL_RTL_SET_P (obj))
2440 break;
2442 if (DECL_MODE (obj) == BLKmode)
2443 x = produce_memory_decl_rtl (obj, regno);
2444 else
2445 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2447 break;
2449 default:
2450 break;
2453 if (x)
2455 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2456 SET_DECL_RTL (obj, x);
2459 return NULL_TREE;
2462 /* Determines cost of the computation of EXPR. */
2464 static unsigned
2465 computation_cost (tree expr)
2467 rtx seq, rslt;
2468 tree type = TREE_TYPE (expr);
2469 unsigned cost;
2470 /* Avoid using hard regs in ways which may be unsupported. */
2471 int regno = LAST_VIRTUAL_REGISTER + 1;
2473 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2474 start_sequence ();
2475 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2476 seq = get_insns ();
2477 end_sequence ();
2479 cost = seq_cost (seq);
2480 if (MEM_P (rslt))
2481 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2483 return cost;
2486 /* Returns variable containing the value of candidate CAND at statement AT. */
2488 static tree
2489 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2491 if (stmt_after_increment (loop, cand, stmt))
2492 return cand->var_after;
2493 else
2494 return cand->var_before;
2497 /* Determines the expression by that USE is expressed from induction variable
2498 CAND at statement AT in LOOP. */
2500 static tree
2501 get_computation_at (struct loop *loop,
2502 struct iv_use *use, struct iv_cand *cand, tree at)
2504 tree ubase = use->iv->base;
2505 tree ustep = use->iv->step;
2506 tree cbase = cand->iv->base;
2507 tree cstep = cand->iv->step;
2508 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2509 tree uutype;
2510 tree expr, delta;
2511 tree ratio;
2512 unsigned HOST_WIDE_INT ustepi, cstepi;
2513 HOST_WIDE_INT ratioi;
2515 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2517 /* We do not have a precision to express the values of use. */
2518 return NULL_TREE;
2521 expr = var_at_stmt (loop, cand, at);
2523 if (TREE_TYPE (expr) != ctype)
2525 /* This may happen with the original ivs. */
2526 expr = fold_convert (ctype, expr);
2529 if (TYPE_UNSIGNED (utype))
2530 uutype = utype;
2531 else
2533 uutype = unsigned_type_for (utype);
2534 ubase = fold_convert (uutype, ubase);
2535 ustep = fold_convert (uutype, ustep);
2538 if (uutype != ctype)
2540 expr = fold_convert (uutype, expr);
2541 cbase = fold_convert (uutype, cbase);
2542 cstep = fold_convert (uutype, cstep);
2545 if (!cst_and_fits_in_hwi (cstep)
2546 || !cst_and_fits_in_hwi (ustep))
2547 return NULL_TREE;
2549 ustepi = int_cst_value (ustep);
2550 cstepi = int_cst_value (cstep);
2552 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2554 /* TODO maybe consider case when ustep divides cstep and the ratio is
2555 a power of 2 (so that the division is fast to execute)? We would
2556 need to be much more careful with overflows etc. then. */
2557 return NULL_TREE;
2560 /* We may need to shift the value if we are after the increment. */
2561 if (stmt_after_increment (loop, cand, at))
2562 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2564 /* use = ubase - ratio * cbase + ratio * var.
2566 In general case ubase + ratio * (var - cbase) could be better (one less
2567 multiplication), but often it is possible to eliminate redundant parts
2568 of computations from (ubase - ratio * cbase) term, and if it does not
2569 happen, fold is able to apply the distributive law to obtain this form
2570 anyway. */
2572 if (ratioi == 1)
2574 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2575 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2577 else if (ratioi == -1)
2579 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2580 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2582 else
2584 ratio = build_int_cst_type (uutype, ratioi);
2585 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2586 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2587 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2588 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2591 return fold_convert (utype, expr);
2594 /* Determines the expression by that USE is expressed from induction variable
2595 CAND in LOOP. */
2597 static tree
2598 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2600 return get_computation_at (loop, use, cand, use->stmt);
2603 /* Returns cost of addition in MODE. */
2605 static unsigned
2606 add_cost (enum machine_mode mode)
2608 static unsigned costs[NUM_MACHINE_MODES];
2609 rtx seq;
2610 unsigned cost;
2612 if (costs[mode])
2613 return costs[mode];
2615 start_sequence ();
2616 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2617 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2618 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2619 NULL_RTX);
2620 seq = get_insns ();
2621 end_sequence ();
2623 cost = seq_cost (seq);
2624 if (!cost)
2625 cost = 1;
2627 costs[mode] = cost;
2629 if (dump_file && (dump_flags & TDF_DETAILS))
2630 fprintf (dump_file, "Addition in %s costs %d\n",
2631 GET_MODE_NAME (mode), cost);
2632 return cost;
2635 /* Entry in a hashtable of already known costs for multiplication. */
2636 struct mbc_entry
2638 HOST_WIDE_INT cst; /* The constant to multiply by. */
2639 enum machine_mode mode; /* In mode. */
2640 unsigned cost; /* The cost. */
2643 /* Counts hash value for the ENTRY. */
2645 static hashval_t
2646 mbc_entry_hash (const void *entry)
2648 const struct mbc_entry *e = entry;
2650 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2653 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2655 static int
2656 mbc_entry_eq (const void *entry1, const void *entry2)
2658 const struct mbc_entry *e1 = entry1;
2659 const struct mbc_entry *e2 = entry2;
2661 return (e1->mode == e2->mode
2662 && e1->cst == e2->cst);
2665 /* Returns cost of multiplication by constant CST in MODE. */
2667 static unsigned
2668 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2670 static htab_t costs;
2671 struct mbc_entry **cached, act;
2672 rtx seq;
2673 unsigned cost;
2675 if (!costs)
2676 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2678 act.mode = mode;
2679 act.cst = cst;
2680 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2681 if (*cached)
2682 return (*cached)->cost;
2684 *cached = xmalloc (sizeof (struct mbc_entry));
2685 (*cached)->mode = mode;
2686 (*cached)->cst = cst;
2688 start_sequence ();
2689 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2690 NULL_RTX, 0);
2691 seq = get_insns ();
2692 end_sequence ();
2694 cost = seq_cost (seq);
2696 if (dump_file && (dump_flags & TDF_DETAILS))
2697 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2698 (int) cst, GET_MODE_NAME (mode), cost);
2700 (*cached)->cost = cost;
2702 return cost;
2705 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2706 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2707 variable is omitted. The created memory accesses MODE.
2709 TODO -- there must be some better way. This all is quite crude. */
2711 static unsigned
2712 get_address_cost (bool symbol_present, bool var_present,
2713 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2715 #define MAX_RATIO 128
2716 static sbitmap valid_mult;
2717 static HOST_WIDE_INT rat, off;
2718 static HOST_WIDE_INT min_offset, max_offset;
2719 static unsigned costs[2][2][2][2];
2720 unsigned cost, acost;
2721 rtx seq, addr, base;
2722 bool offset_p, ratio_p;
2723 rtx reg1;
2724 HOST_WIDE_INT s_offset;
2725 unsigned HOST_WIDE_INT mask;
2726 unsigned bits;
2728 if (!valid_mult)
2730 HOST_WIDE_INT i;
2732 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2734 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2735 for (i = 1; i <= 1 << 20; i <<= 1)
2737 XEXP (addr, 1) = GEN_INT (i);
2738 if (!memory_address_p (Pmode, addr))
2739 break;
2741 max_offset = i >> 1;
2742 off = max_offset;
2744 for (i = 1; i <= 1 << 20; i <<= 1)
2746 XEXP (addr, 1) = GEN_INT (-i);
2747 if (!memory_address_p (Pmode, addr))
2748 break;
2750 min_offset = -(i >> 1);
2752 if (dump_file && (dump_flags & TDF_DETAILS))
2754 fprintf (dump_file, "get_address_cost:\n");
2755 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2756 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2759 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2760 sbitmap_zero (valid_mult);
2761 rat = 1;
2762 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2763 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2765 XEXP (addr, 1) = GEN_INT (i);
2766 if (memory_address_p (Pmode, addr))
2768 SET_BIT (valid_mult, i + MAX_RATIO);
2769 rat = i;
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2775 fprintf (dump_file, " allowed multipliers:");
2776 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2777 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2778 fprintf (dump_file, " %d", (int) i);
2779 fprintf (dump_file, "\n");
2780 fprintf (dump_file, "\n");
2784 bits = GET_MODE_BITSIZE (Pmode);
2785 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2786 offset &= mask;
2787 if ((offset >> (bits - 1) & 1))
2788 offset |= ~mask;
2789 s_offset = offset;
2791 cost = 0;
2792 offset_p = (s_offset != 0
2793 && min_offset <= s_offset && s_offset <= max_offset);
2794 ratio_p = (ratio != 1
2795 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2796 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2798 if (ratio != 1 && !ratio_p)
2799 cost += multiply_by_cost (ratio, Pmode);
2801 if (s_offset && !offset_p && !symbol_present)
2803 cost += add_cost (Pmode);
2804 var_present = true;
2807 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2808 if (!acost)
2810 acost = 0;
2812 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2813 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2814 if (ratio_p)
2815 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2817 if (var_present)
2818 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2820 if (symbol_present)
2822 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2823 if (offset_p)
2824 base = gen_rtx_fmt_e (CONST, Pmode,
2825 gen_rtx_fmt_ee (PLUS, Pmode,
2826 base,
2827 GEN_INT (off)));
2829 else if (offset_p)
2830 base = GEN_INT (off);
2831 else
2832 base = NULL_RTX;
2834 if (base)
2835 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2837 start_sequence ();
2838 addr = memory_address (Pmode, addr);
2839 seq = get_insns ();
2840 end_sequence ();
2842 acost = seq_cost (seq);
2843 acost += address_cost (addr, Pmode);
2845 if (!acost)
2846 acost = 1;
2847 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2850 return cost + acost;
2853 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2854 the bitmap to that we should store it. */
2856 static struct ivopts_data *fd_ivopts_data;
2857 static tree
2858 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2860 bitmap *depends_on = data;
2861 struct version_info *info;
2863 if (TREE_CODE (*expr_p) != SSA_NAME)
2864 return NULL_TREE;
2865 info = name_info (fd_ivopts_data, *expr_p);
2867 if (!info->inv_id || info->has_nonlin_use)
2868 return NULL_TREE;
2870 if (!*depends_on)
2871 *depends_on = BITMAP_ALLOC (NULL);
2872 bitmap_set_bit (*depends_on, info->inv_id);
2874 return NULL_TREE;
2877 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2878 invariants the computation depends on. */
2880 static unsigned
2881 force_var_cost (struct ivopts_data *data,
2882 tree expr, bitmap *depends_on)
2884 static bool costs_initialized = false;
2885 static unsigned integer_cost;
2886 static unsigned symbol_cost;
2887 static unsigned address_cost;
2888 tree op0, op1;
2889 unsigned cost0, cost1, cost;
2890 enum machine_mode mode;
2892 if (!costs_initialized)
2894 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2895 rtx x = gen_rtx_MEM (DECL_MODE (var),
2896 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2897 tree addr;
2898 tree type = build_pointer_type (integer_type_node);
2900 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2901 2000));
2903 SET_DECL_RTL (var, x);
2904 TREE_STATIC (var) = 1;
2905 addr = build1 (ADDR_EXPR, type, var);
2906 symbol_cost = computation_cost (addr) + 1;
2908 address_cost
2909 = computation_cost (build2 (PLUS_EXPR, type,
2910 addr,
2911 build_int_cst_type (type, 2000))) + 1;
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2914 fprintf (dump_file, "force_var_cost:\n");
2915 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2916 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2917 fprintf (dump_file, " address %d\n", (int) address_cost);
2918 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2919 fprintf (dump_file, "\n");
2922 costs_initialized = true;
2925 STRIP_NOPS (expr);
2927 if (depends_on)
2929 fd_ivopts_data = data;
2930 walk_tree (&expr, find_depends, depends_on, NULL);
2933 if (SSA_VAR_P (expr))
2934 return 0;
2936 if (TREE_INVARIANT (expr))
2938 if (TREE_CODE (expr) == INTEGER_CST)
2939 return integer_cost;
2941 if (TREE_CODE (expr) == ADDR_EXPR)
2943 tree obj = TREE_OPERAND (expr, 0);
2945 if (TREE_CODE (obj) == VAR_DECL
2946 || TREE_CODE (obj) == PARM_DECL
2947 || TREE_CODE (obj) == RESULT_DECL)
2948 return symbol_cost;
2951 return address_cost;
2954 switch (TREE_CODE (expr))
2956 case PLUS_EXPR:
2957 case MINUS_EXPR:
2958 case MULT_EXPR:
2959 op0 = TREE_OPERAND (expr, 0);
2960 op1 = TREE_OPERAND (expr, 1);
2961 STRIP_NOPS (op0);
2962 STRIP_NOPS (op1);
2964 if (is_gimple_val (op0))
2965 cost0 = 0;
2966 else
2967 cost0 = force_var_cost (data, op0, NULL);
2969 if (is_gimple_val (op1))
2970 cost1 = 0;
2971 else
2972 cost1 = force_var_cost (data, op1, NULL);
2974 break;
2976 default:
2977 /* Just an arbitrary value, FIXME. */
2978 return target_spill_cost;
2981 mode = TYPE_MODE (TREE_TYPE (expr));
2982 switch (TREE_CODE (expr))
2984 case PLUS_EXPR:
2985 case MINUS_EXPR:
2986 cost = add_cost (mode);
2987 break;
2989 case MULT_EXPR:
2990 if (cst_and_fits_in_hwi (op0))
2991 cost = multiply_by_cost (int_cst_value (op0), mode);
2992 else if (cst_and_fits_in_hwi (op1))
2993 cost = multiply_by_cost (int_cst_value (op1), mode);
2994 else
2995 return target_spill_cost;
2996 break;
2998 default:
2999 gcc_unreachable ();
3002 cost += cost0;
3003 cost += cost1;
3005 /* Bound the cost by target_spill_cost. The parts of complicated
3006 computations often are either loop invariant or at least can
3007 be shared between several iv uses, so letting this grow without
3008 limits would not give reasonable results. */
3009 return cost < target_spill_cost ? cost : target_spill_cost;
3012 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3013 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3014 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3015 invariants the computation depends on. */
3017 static unsigned
3018 split_address_cost (struct ivopts_data *data,
3019 tree addr, bool *symbol_present, bool *var_present,
3020 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3022 tree core;
3023 HOST_WIDE_INT bitsize;
3024 HOST_WIDE_INT bitpos;
3025 tree toffset;
3026 enum machine_mode mode;
3027 int unsignedp, volatilep;
3029 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3030 &unsignedp, &volatilep, false);
3032 if (toffset != 0
3033 || bitpos % BITS_PER_UNIT != 0
3034 || TREE_CODE (core) != VAR_DECL)
3036 *symbol_present = false;
3037 *var_present = true;
3038 fd_ivopts_data = data;
3039 walk_tree (&addr, find_depends, depends_on, NULL);
3040 return target_spill_cost;
3043 *offset += bitpos / BITS_PER_UNIT;
3044 if (TREE_STATIC (core)
3045 || DECL_EXTERNAL (core))
3047 *symbol_present = true;
3048 *var_present = false;
3049 return 0;
3052 *symbol_present = false;
3053 *var_present = true;
3054 return 0;
3057 /* Estimates cost of expressing difference of addresses E1 - E2 as
3058 var + symbol + offset. The value of offset is added to OFFSET,
3059 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3060 part is missing. DEPENDS_ON is a set of the invariants the computation
3061 depends on. */
3063 static unsigned
3064 ptr_difference_cost (struct ivopts_data *data,
3065 tree e1, tree e2, bool *symbol_present, bool *var_present,
3066 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3068 HOST_WIDE_INT diff = 0;
3069 unsigned cost;
3071 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3073 if (ptr_difference_const (e1, e2, &diff))
3075 *offset += diff;
3076 *symbol_present = false;
3077 *var_present = false;
3078 return 0;
3081 if (e2 == integer_zero_node)
3082 return split_address_cost (data, TREE_OPERAND (e1, 0),
3083 symbol_present, var_present, offset, depends_on);
3085 *symbol_present = false;
3086 *var_present = true;
3088 cost = force_var_cost (data, e1, depends_on);
3089 cost += force_var_cost (data, e2, depends_on);
3090 cost += add_cost (Pmode);
3092 return cost;
3095 /* Estimates cost of expressing difference E1 - E2 as
3096 var + symbol + offset. The value of offset is added to OFFSET,
3097 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3098 part is missing. DEPENDS_ON is a set of the invariants the computation
3099 depends on. */
3101 static unsigned
3102 difference_cost (struct ivopts_data *data,
3103 tree e1, tree e2, bool *symbol_present, bool *var_present,
3104 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3106 unsigned cost;
3107 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3108 unsigned HOST_WIDE_INT off1, off2;
3110 e1 = strip_offset (e1, false, &off1);
3111 e2 = strip_offset (e2, false, &off2);
3112 *offset += off1 - off2;
3114 STRIP_NOPS (e1);
3115 STRIP_NOPS (e2);
3117 if (TREE_CODE (e1) == ADDR_EXPR)
3118 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3119 depends_on);
3120 *symbol_present = false;
3122 if (operand_equal_p (e1, e2, 0))
3124 *var_present = false;
3125 return 0;
3127 *var_present = true;
3128 if (zero_p (e2))
3129 return force_var_cost (data, e1, depends_on);
3131 if (zero_p (e1))
3133 cost = force_var_cost (data, e2, depends_on);
3134 cost += multiply_by_cost (-1, mode);
3136 return cost;
3139 cost = force_var_cost (data, e1, depends_on);
3140 cost += force_var_cost (data, e2, depends_on);
3141 cost += add_cost (mode);
3143 return cost;
3146 /* Determines the cost of the computation by that USE is expressed
3147 from induction variable CAND. If ADDRESS_P is true, we just need
3148 to create an address from it, otherwise we want to get it into
3149 register. A set of invariants we depend on is stored in
3150 DEPENDS_ON. AT is the statement at that the value is computed. */
3152 static unsigned
3153 get_computation_cost_at (struct ivopts_data *data,
3154 struct iv_use *use, struct iv_cand *cand,
3155 bool address_p, bitmap *depends_on, tree at)
3157 tree ubase = use->iv->base, ustep = use->iv->step;
3158 tree cbase, cstep;
3159 tree utype = TREE_TYPE (ubase), ctype;
3160 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3161 HOST_WIDE_INT ratio, aratio;
3162 bool var_present, symbol_present;
3163 unsigned cost = 0, n_sums;
3165 *depends_on = NULL;
3167 /* Only consider real candidates. */
3168 if (!cand->iv)
3169 return INFTY;
3171 cbase = cand->iv->base;
3172 cstep = cand->iv->step;
3173 ctype = TREE_TYPE (cbase);
3175 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3177 /* We do not have a precision to express the values of use. */
3178 return INFTY;
3181 if (address_p)
3183 /* Do not try to express address of an object with computation based
3184 on address of a different object. This may cause problems in rtl
3185 level alias analysis (that does not expect this to be happening,
3186 as this is illegal in C), and would be unlikely to be useful
3187 anyway. */
3188 if (use->iv->base_object
3189 && cand->iv->base_object
3190 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3191 return INFTY;
3194 if (!cst_and_fits_in_hwi (ustep)
3195 || !cst_and_fits_in_hwi (cstep))
3196 return INFTY;
3198 if (TREE_CODE (ubase) == INTEGER_CST
3199 && !cst_and_fits_in_hwi (ubase))
3200 goto fallback;
3202 if (TREE_CODE (cbase) == INTEGER_CST
3203 && !cst_and_fits_in_hwi (cbase))
3204 goto fallback;
3206 ustepi = int_cst_value (ustep);
3207 cstepi = int_cst_value (cstep);
3209 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3211 /* TODO -- add direct handling of this case. */
3212 goto fallback;
3215 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3216 return INFTY;
3218 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3219 or ratio == 1, it is better to handle this like
3221 ubase - ratio * cbase + ratio * var
3223 (also holds in the case ratio == -1, TODO. */
3225 if (TREE_CODE (cbase) == INTEGER_CST)
3227 offset = - ratio * int_cst_value (cbase);
3228 cost += difference_cost (data,
3229 ubase, integer_zero_node,
3230 &symbol_present, &var_present, &offset,
3231 depends_on);
3233 else if (ratio == 1)
3235 cost += difference_cost (data,
3236 ubase, cbase,
3237 &symbol_present, &var_present, &offset,
3238 depends_on);
3240 else
3242 cost += force_var_cost (data, cbase, depends_on);
3243 cost += add_cost (TYPE_MODE (ctype));
3244 cost += difference_cost (data,
3245 ubase, integer_zero_node,
3246 &symbol_present, &var_present, &offset,
3247 depends_on);
3250 /* If we are after the increment, the value of the candidate is higher by
3251 one iteration. */
3252 if (stmt_after_increment (data->current_loop, cand, at))
3253 offset -= ratio * cstepi;
3255 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3256 (symbol/var/const parts may be omitted). If we are looking for an address,
3257 find the cost of addressing this. */
3258 if (address_p)
3259 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3261 /* Otherwise estimate the costs for computing the expression. */
3262 aratio = ratio > 0 ? ratio : -ratio;
3263 if (!symbol_present && !var_present && !offset)
3265 if (ratio != 1)
3266 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3268 return cost;
3271 if (aratio != 1)
3272 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3274 n_sums = 1;
3275 if (var_present
3276 /* Symbol + offset should be compile-time computable. */
3277 && (symbol_present || offset))
3278 n_sums++;
3280 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3282 fallback:
3284 /* Just get the expression, expand it and measure the cost. */
3285 tree comp = get_computation_at (data->current_loop, use, cand, at);
3287 if (!comp)
3288 return INFTY;
3290 if (address_p)
3291 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3293 return computation_cost (comp);
3297 /* Determines the cost of the computation by that USE is expressed
3298 from induction variable CAND. If ADDRESS_P is true, we just need
3299 to create an address from it, otherwise we want to get it into
3300 register. A set of invariants we depend on is stored in
3301 DEPENDS_ON. */
3303 static unsigned
3304 get_computation_cost (struct ivopts_data *data,
3305 struct iv_use *use, struct iv_cand *cand,
3306 bool address_p, bitmap *depends_on)
3308 return get_computation_cost_at (data,
3309 use, cand, address_p, depends_on, use->stmt);
3312 /* Determines cost of basing replacement of USE on CAND in a generic
3313 expression. */
3315 static bool
3316 determine_use_iv_cost_generic (struct ivopts_data *data,
3317 struct iv_use *use, struct iv_cand *cand)
3319 bitmap depends_on;
3320 unsigned cost;
3322 /* The simple case first -- if we need to express value of the preserved
3323 original biv, the cost is 0. This also prevents us from counting the
3324 cost of increment twice -- once at this use and once in the cost of
3325 the candidate. */
3326 if (cand->pos == IP_ORIGINAL
3327 && cand->incremented_at == use->stmt)
3329 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3330 return true;
3333 cost = get_computation_cost (data, use, cand, false, &depends_on);
3334 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3336 return cost != INFTY;
3339 /* Determines cost of basing replacement of USE on CAND in an address. */
3341 static bool
3342 determine_use_iv_cost_address (struct ivopts_data *data,
3343 struct iv_use *use, struct iv_cand *cand)
3345 bitmap depends_on;
3346 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3348 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3350 return cost != INFTY;
3353 /* Computes value of induction variable IV in iteration NITER. */
3355 static tree
3356 iv_value (struct iv *iv, tree niter)
3358 tree val;
3359 tree type = TREE_TYPE (iv->base);
3361 niter = fold_convert (type, niter);
3362 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3364 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3367 /* Computes value of candidate CAND at position AT in iteration NITER. */
3369 static tree
3370 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3372 tree val = iv_value (cand->iv, niter);
3373 tree type = TREE_TYPE (cand->iv->base);
3375 if (stmt_after_increment (loop, cand, at))
3376 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3378 return val;
3381 /* Returns period of induction variable iv. */
3383 static tree
3384 iv_period (struct iv *iv)
3386 tree step = iv->step, period, type;
3387 tree pow2div;
3389 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3391 /* Period of the iv is gcd (step, type range). Since type range is power
3392 of two, it suffices to determine the maximum power of two that divides
3393 step. */
3394 pow2div = num_ending_zeros (step);
3395 type = unsigned_type_for (TREE_TYPE (step));
3397 period = build_low_bits_mask (type,
3398 (TYPE_PRECISION (type)
3399 - tree_low_cst (pow2div, 1)));
3401 return period;
3404 /* Returns the comparison operator used when eliminating the iv USE. */
3406 static enum tree_code
3407 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3409 struct loop *loop = data->current_loop;
3410 basic_block ex_bb;
3411 edge exit;
3413 ex_bb = bb_for_stmt (use->stmt);
3414 exit = EDGE_SUCC (ex_bb, 0);
3415 if (flow_bb_inside_loop_p (loop, exit->dest))
3416 exit = EDGE_SUCC (ex_bb, 1);
3418 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3421 /* Check whether it is possible to express the condition in USE by comparison
3422 of candidate CAND. If so, store the value compared with to BOUND. */
3424 static bool
3425 may_eliminate_iv (struct ivopts_data *data,
3426 struct iv_use *use, struct iv_cand *cand, tree *bound)
3428 basic_block ex_bb;
3429 edge exit;
3430 struct tree_niter_desc *niter;
3431 tree nit, nit_type;
3432 tree wider_type, period, per_type;
3433 struct loop *loop = data->current_loop;
3435 /* For now works only for exits that dominate the loop latch. TODO -- extend
3436 for other conditions inside loop body. */
3437 ex_bb = bb_for_stmt (use->stmt);
3438 if (use->stmt != last_stmt (ex_bb)
3439 || TREE_CODE (use->stmt) != COND_EXPR)
3440 return false;
3441 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3442 return false;
3444 exit = EDGE_SUCC (ex_bb, 0);
3445 if (flow_bb_inside_loop_p (loop, exit->dest))
3446 exit = EDGE_SUCC (ex_bb, 1);
3447 if (flow_bb_inside_loop_p (loop, exit->dest))
3448 return false;
3450 niter = niter_for_exit (data, exit);
3451 if (!niter
3452 || !zero_p (niter->may_be_zero))
3453 return false;
3455 nit = niter->niter;
3456 nit_type = TREE_TYPE (nit);
3458 /* Determine whether we may use the variable to test whether niter iterations
3459 elapsed. This is the case iff the period of the induction variable is
3460 greater than the number of iterations. */
3461 period = iv_period (cand->iv);
3462 if (!period)
3463 return false;
3464 per_type = TREE_TYPE (period);
3466 wider_type = TREE_TYPE (period);
3467 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3468 wider_type = per_type;
3469 else
3470 wider_type = nit_type;
3472 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3473 fold_convert (wider_type, period),
3474 fold_convert (wider_type, nit)))))
3475 return false;
3477 *bound = cand_value_at (loop, cand, use->stmt, nit);
3478 return true;
3481 /* Determines cost of basing replacement of USE on CAND in a condition. */
3483 static bool
3484 determine_use_iv_cost_condition (struct ivopts_data *data,
3485 struct iv_use *use, struct iv_cand *cand)
3487 tree bound = NULL_TREE, op, cond;
3488 bitmap depends_on = NULL;
3489 unsigned cost;
3491 /* Only consider real candidates. */
3492 if (!cand->iv)
3494 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3495 return false;
3498 if (may_eliminate_iv (data, use, cand, &bound))
3500 cost = force_var_cost (data, bound, &depends_on);
3502 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3503 return cost != INFTY;
3506 /* The induction variable elimination failed; just express the original
3507 giv. If it is compared with an invariant, note that we cannot get
3508 rid of it. */
3509 cost = get_computation_cost (data, use, cand, false, &depends_on);
3511 cond = *use->op_p;
3512 if (TREE_CODE (cond) != SSA_NAME)
3514 op = TREE_OPERAND (cond, 0);
3515 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3516 op = TREE_OPERAND (cond, 1);
3517 if (TREE_CODE (op) == SSA_NAME)
3519 op = get_iv (data, op)->base;
3520 fd_ivopts_data = data;
3521 walk_tree (&op, find_depends, &depends_on, NULL);
3525 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3526 return cost != INFTY;
3529 /* Checks whether it is possible to replace the final value of USE by
3530 a direct computation. If so, the formula is stored to *VALUE. */
3532 static bool
3533 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3534 tree *value)
3536 struct loop *loop = data->current_loop;
3537 edge exit;
3538 struct tree_niter_desc *niter;
3540 exit = single_dom_exit (loop);
3541 if (!exit)
3542 return false;
3544 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3545 bb_for_stmt (use->stmt)));
3547 niter = niter_for_single_dom_exit (data);
3548 if (!niter
3549 || !zero_p (niter->may_be_zero))
3550 return false;
3552 *value = iv_value (use->iv, niter->niter);
3554 return true;
3557 /* Determines cost of replacing final value of USE using CAND. */
3559 static bool
3560 determine_use_iv_cost_outer (struct ivopts_data *data,
3561 struct iv_use *use, struct iv_cand *cand)
3563 bitmap depends_on;
3564 unsigned cost;
3565 edge exit;
3566 tree value = NULL_TREE;
3567 struct loop *loop = data->current_loop;
3569 /* The simple case first -- if we need to express value of the preserved
3570 original biv, the cost is 0. This also prevents us from counting the
3571 cost of increment twice -- once at this use and once in the cost of
3572 the candidate. */
3573 if (cand->pos == IP_ORIGINAL
3574 && cand->incremented_at == use->stmt)
3576 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3577 return true;
3580 if (!cand->iv)
3582 if (!may_replace_final_value (data, use, &value))
3584 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3585 return false;
3588 depends_on = NULL;
3589 cost = force_var_cost (data, value, &depends_on);
3591 cost /= AVG_LOOP_NITER (loop);
3593 set_use_iv_cost (data, use, cand, cost, depends_on, value);
3594 return cost != INFTY;
3597 exit = single_dom_exit (loop);
3598 if (exit)
3600 /* If there is just a single exit, we may use value of the candidate
3601 after we take it to determine the value of use. */
3602 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3603 last_stmt (exit->src));
3604 if (cost != INFTY)
3605 cost /= AVG_LOOP_NITER (loop);
3607 else
3609 /* Otherwise we just need to compute the iv. */
3610 cost = get_computation_cost (data, use, cand, false, &depends_on);
3613 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3615 return cost != INFTY;
3618 /* Determines cost of basing replacement of USE on CAND. Returns false
3619 if USE cannot be based on CAND. */
3621 static bool
3622 determine_use_iv_cost (struct ivopts_data *data,
3623 struct iv_use *use, struct iv_cand *cand)
3625 switch (use->type)
3627 case USE_NONLINEAR_EXPR:
3628 return determine_use_iv_cost_generic (data, use, cand);
3630 case USE_OUTER:
3631 return determine_use_iv_cost_outer (data, use, cand);
3633 case USE_ADDRESS:
3634 return determine_use_iv_cost_address (data, use, cand);
3636 case USE_COMPARE:
3637 return determine_use_iv_cost_condition (data, use, cand);
3639 default:
3640 gcc_unreachable ();
3644 /* Determines costs of basing the use of the iv on an iv candidate. */
3646 static void
3647 determine_use_iv_costs (struct ivopts_data *data)
3649 unsigned i, j;
3650 struct iv_use *use;
3651 struct iv_cand *cand;
3652 bitmap to_clear = BITMAP_ALLOC (NULL);
3654 alloc_use_cost_map (data);
3656 for (i = 0; i < n_iv_uses (data); i++)
3658 use = iv_use (data, i);
3660 if (data->consider_all_candidates)
3662 for (j = 0; j < n_iv_cands (data); j++)
3664 cand = iv_cand (data, j);
3665 determine_use_iv_cost (data, use, cand);
3668 else
3670 bitmap_iterator bi;
3672 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3674 cand = iv_cand (data, j);
3675 if (!determine_use_iv_cost (data, use, cand))
3676 bitmap_set_bit (to_clear, j);
3679 /* Remove the candidates for that the cost is infinite from
3680 the list of related candidates. */
3681 bitmap_and_compl_into (use->related_cands, to_clear);
3682 bitmap_clear (to_clear);
3686 BITMAP_FREE (to_clear);
3688 if (dump_file && (dump_flags & TDF_DETAILS))
3690 fprintf (dump_file, "Use-candidate costs:\n");
3692 for (i = 0; i < n_iv_uses (data); i++)
3694 use = iv_use (data, i);
3696 fprintf (dump_file, "Use %d:\n", i);
3697 fprintf (dump_file, " cand\tcost\tdepends on\n");
3698 for (j = 0; j < use->n_map_members; j++)
3700 if (!use->cost_map[j].cand
3701 || use->cost_map[j].cost == INFTY)
3702 continue;
3704 fprintf (dump_file, " %d\t%d\t",
3705 use->cost_map[j].cand->id,
3706 use->cost_map[j].cost);
3707 if (use->cost_map[j].depends_on)
3708 bitmap_print (dump_file,
3709 use->cost_map[j].depends_on, "","");
3710 fprintf (dump_file, "\n");
3713 fprintf (dump_file, "\n");
3715 fprintf (dump_file, "\n");
3719 /* Determines cost of the candidate CAND. */
3721 static void
3722 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3724 unsigned cost_base, cost_step;
3725 tree base;
3727 if (!cand->iv)
3729 cand->cost = 0;
3730 return;
3733 /* There are two costs associated with the candidate -- its increment
3734 and its initialization. The second is almost negligible for any loop
3735 that rolls enough, so we take it just very little into account. */
3737 base = cand->iv->base;
3738 cost_base = force_var_cost (data, base, NULL);
3739 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3741 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3743 /* Prefer the original iv unless we may gain something by replacing it;
3744 this is not really relevant for artificial ivs created by other
3745 passes. */
3746 if (cand->pos == IP_ORIGINAL
3747 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3748 cand->cost--;
3750 /* Prefer not to insert statements into latch unless there are some
3751 already (so that we do not create unnecessary jumps). */
3752 if (cand->pos == IP_END
3753 && empty_block_p (ip_end_pos (data->current_loop)))
3754 cand->cost++;
3757 /* Determines costs of computation of the candidates. */
3759 static void
3760 determine_iv_costs (struct ivopts_data *data)
3762 unsigned i;
3764 if (dump_file && (dump_flags & TDF_DETAILS))
3766 fprintf (dump_file, "Candidate costs:\n");
3767 fprintf (dump_file, " cand\tcost\n");
3770 for (i = 0; i < n_iv_cands (data); i++)
3772 struct iv_cand *cand = iv_cand (data, i);
3774 determine_iv_cost (data, cand);
3776 if (dump_file && (dump_flags & TDF_DETAILS))
3777 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781 fprintf (dump_file, "\n");
3784 /* Calculates cost for having SIZE induction variables. */
3786 static unsigned
3787 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3789 return global_cost_for_size (size,
3790 loop_data (data->current_loop)->regs_used,
3791 n_iv_uses (data));
3794 /* For each size of the induction variable set determine the penalty. */
3796 static void
3797 determine_set_costs (struct ivopts_data *data)
3799 unsigned j, n;
3800 tree phi, op;
3801 struct loop *loop = data->current_loop;
3802 bitmap_iterator bi;
3804 /* We use the following model (definitely improvable, especially the
3805 cost function -- TODO):
3807 We estimate the number of registers available (using MD data), name it A.
3809 We estimate the number of registers used by the loop, name it U. This
3810 number is obtained as the number of loop phi nodes (not counting virtual
3811 registers and bivs) + the number of variables from outside of the loop.
3813 We set a reserve R (free regs that are used for temporary computations,
3814 etc.). For now the reserve is a constant 3.
3816 Let I be the number of induction variables.
3818 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3819 make a lot of ivs without a reason).
3820 -- if A - R < U + I <= A, the cost is I * PRES_COST
3821 -- if U + I > A, the cost is I * PRES_COST and
3822 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3824 if (dump_file && (dump_flags & TDF_DETAILS))
3826 fprintf (dump_file, "Global costs:\n");
3827 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3828 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3829 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3830 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3833 n = 0;
3834 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3836 op = PHI_RESULT (phi);
3838 if (!is_gimple_reg (op))
3839 continue;
3841 if (get_iv (data, op))
3842 continue;
3844 n++;
3847 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3849 struct version_info *info = ver_info (data, j);
3851 if (info->inv_id && info->has_nonlin_use)
3852 n++;
3855 loop_data (loop)->regs_used = n;
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, " regs_used %d\n", n);
3859 if (dump_file && (dump_flags & TDF_DETAILS))
3861 fprintf (dump_file, " cost for size:\n");
3862 fprintf (dump_file, " ivs\tcost\n");
3863 for (j = 0; j <= 2 * target_avail_regs; j++)
3864 fprintf (dump_file, " %d\t%d\n", j,
3865 ivopts_global_cost_for_size (data, j));
3866 fprintf (dump_file, "\n");
3870 /* Returns true if A is a cheaper cost pair than B. */
3872 static bool
3873 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3875 if (!a)
3876 return false;
3878 if (!b)
3879 return true;
3881 if (a->cost < b->cost)
3882 return true;
3884 if (a->cost > b->cost)
3885 return false;
3887 /* In case the costs are the same, prefer the cheaper candidate. */
3888 if (a->cand->cost < b->cand->cost)
3889 return true;
3891 return false;
3894 /* Computes the cost field of IVS structure. */
3896 static void
3897 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3899 unsigned cost = 0;
3901 cost += ivs->cand_use_cost;
3902 cost += ivs->cand_cost;
3903 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3905 ivs->cost = cost;
3908 /* Set USE not to be expressed by any candidate in IVS. */
3910 static void
3911 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3912 struct iv_use *use)
3914 unsigned uid = use->id, cid, iid;
3915 bitmap deps;
3916 struct cost_pair *cp;
3917 bitmap_iterator bi;
3919 cp = ivs->cand_for_use[uid];
3920 if (!cp)
3921 return;
3922 cid = cp->cand->id;
3924 ivs->bad_uses++;
3925 ivs->cand_for_use[uid] = NULL;
3926 ivs->n_cand_uses[cid]--;
3928 if (ivs->n_cand_uses[cid] == 0)
3930 bitmap_clear_bit (ivs->cands, cid);
3931 /* Do not count the pseudocandidates. */
3932 if (cp->cand->iv)
3933 ivs->n_regs--;
3934 ivs->n_cands--;
3935 ivs->cand_cost -= cp->cand->cost;
3938 ivs->cand_use_cost -= cp->cost;
3940 deps = cp->depends_on;
3942 if (deps)
3944 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3946 ivs->n_invariant_uses[iid]--;
3947 if (ivs->n_invariant_uses[iid] == 0)
3948 ivs->n_regs--;
3952 iv_ca_recount_cost (data, ivs);
3955 /* Set cost pair for USE in set IVS to CP. */
3957 static void
3958 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3959 struct iv_use *use, struct cost_pair *cp)
3961 unsigned uid = use->id, cid, iid;
3962 bitmap deps;
3963 bitmap_iterator bi;
3965 if (ivs->cand_for_use[uid] == cp)
3966 return;
3968 if (ivs->cand_for_use[uid])
3969 iv_ca_set_no_cp (data, ivs, use);
3971 if (cp)
3973 cid = cp->cand->id;
3975 ivs->bad_uses--;
3976 ivs->cand_for_use[uid] = cp;
3977 ivs->n_cand_uses[cid]++;
3978 if (ivs->n_cand_uses[cid] == 1)
3980 bitmap_set_bit (ivs->cands, cid);
3981 /* Do not count the pseudocandidates. */
3982 if (cp->cand->iv)
3983 ivs->n_regs++;
3984 ivs->n_cands++;
3985 ivs->cand_cost += cp->cand->cost;
3988 ivs->cand_use_cost += cp->cost;
3990 deps = cp->depends_on;
3992 if (deps)
3994 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3996 ivs->n_invariant_uses[iid]++;
3997 if (ivs->n_invariant_uses[iid] == 1)
3998 ivs->n_regs++;
4002 iv_ca_recount_cost (data, ivs);
4006 /* Extend set IVS by expressing USE by some of the candidates in it
4007 if possible. */
4009 static void
4010 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4011 struct iv_use *use)
4013 struct cost_pair *best_cp = NULL, *cp;
4014 bitmap_iterator bi;
4015 unsigned i;
4017 gcc_assert (ivs->upto >= use->id);
4019 if (ivs->upto == use->id)
4021 ivs->upto++;
4022 ivs->bad_uses++;
4025 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4027 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4029 if (cheaper_cost_pair (cp, best_cp))
4030 best_cp = cp;
4033 iv_ca_set_cp (data, ivs, use, best_cp);
4036 /* Get cost for assignment IVS. */
4038 static unsigned
4039 iv_ca_cost (struct iv_ca *ivs)
4041 return (ivs->bad_uses ? INFTY : ivs->cost);
4044 /* Returns true if all dependences of CP are among invariants in IVS. */
4046 static bool
4047 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4049 unsigned i;
4050 bitmap_iterator bi;
4052 if (!cp->depends_on)
4053 return true;
4055 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4057 if (ivs->n_invariant_uses[i] == 0)
4058 return false;
4061 return true;
4064 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4065 it before NEXT_CHANGE. */
4067 static struct iv_ca_delta *
4068 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4069 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4071 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4073 change->use = use;
4074 change->old_cp = old_cp;
4075 change->new_cp = new_cp;
4076 change->next_change = next_change;
4078 return change;
4081 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4082 are rewritten. */
4084 static struct iv_ca_delta *
4085 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4087 struct iv_ca_delta *last;
4089 if (!l2)
4090 return l1;
4092 if (!l1)
4093 return l2;
4095 for (last = l1; last->next_change; last = last->next_change)
4096 continue;
4097 last->next_change = l2;
4099 return l1;
4102 /* Returns candidate by that USE is expressed in IVS. */
4104 static struct cost_pair *
4105 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4107 return ivs->cand_for_use[use->id];
4110 /* Reverse the list of changes DELTA, forming the inverse to it. */
4112 static struct iv_ca_delta *
4113 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4115 struct iv_ca_delta *act, *next, *prev = NULL;
4116 struct cost_pair *tmp;
4118 for (act = delta; act; act = next)
4120 next = act->next_change;
4121 act->next_change = prev;
4122 prev = act;
4124 tmp = act->old_cp;
4125 act->old_cp = act->new_cp;
4126 act->new_cp = tmp;
4129 return prev;
4132 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4133 reverted instead. */
4135 static void
4136 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4137 struct iv_ca_delta *delta, bool forward)
4139 struct cost_pair *from, *to;
4140 struct iv_ca_delta *act;
4142 if (!forward)
4143 delta = iv_ca_delta_reverse (delta);
4145 for (act = delta; act; act = act->next_change)
4147 from = act->old_cp;
4148 to = act->new_cp;
4149 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4150 iv_ca_set_cp (data, ivs, act->use, to);
4153 if (!forward)
4154 iv_ca_delta_reverse (delta);
4157 /* Returns true if CAND is used in IVS. */
4159 static bool
4160 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4162 return ivs->n_cand_uses[cand->id] > 0;
4165 /* Returns number of induction variable candidates in the set IVS. */
4167 static unsigned
4168 iv_ca_n_cands (struct iv_ca *ivs)
4170 return ivs->n_cands;
4173 /* Free the list of changes DELTA. */
4175 static void
4176 iv_ca_delta_free (struct iv_ca_delta **delta)
4178 struct iv_ca_delta *act, *next;
4180 for (act = *delta; act; act = next)
4182 next = act->next_change;
4183 free (act);
4186 *delta = NULL;
4189 /* Allocates new iv candidates assignment. */
4191 static struct iv_ca *
4192 iv_ca_new (struct ivopts_data *data)
4194 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4196 nw->upto = 0;
4197 nw->bad_uses = 0;
4198 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4199 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4200 nw->cands = BITMAP_ALLOC (NULL);
4201 nw->n_cands = 0;
4202 nw->n_regs = 0;
4203 nw->cand_use_cost = 0;
4204 nw->cand_cost = 0;
4205 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4206 nw->cost = 0;
4208 return nw;
4211 /* Free memory occupied by the set IVS. */
4213 static void
4214 iv_ca_free (struct iv_ca **ivs)
4216 free ((*ivs)->cand_for_use);
4217 free ((*ivs)->n_cand_uses);
4218 BITMAP_FREE ((*ivs)->cands);
4219 free ((*ivs)->n_invariant_uses);
4220 free (*ivs);
4221 *ivs = NULL;
4224 /* Dumps IVS to FILE. */
4226 static void
4227 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4229 const char *pref = " invariants ";
4230 unsigned i;
4232 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4233 bitmap_print (file, ivs->cands, " candidates ","\n");
4235 for (i = 1; i <= data->max_inv_id; i++)
4236 if (ivs->n_invariant_uses[i])
4238 fprintf (file, "%s%d", pref, i);
4239 pref = ", ";
4241 fprintf (file, "\n");
4244 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4245 new set, and store differences in DELTA. Number of induction variables
4246 in the new set is stored to N_IVS. */
4248 static unsigned
4249 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4250 struct iv_cand *cand, struct iv_ca_delta **delta,
4251 unsigned *n_ivs)
4253 unsigned i, cost;
4254 struct iv_use *use;
4255 struct cost_pair *old_cp, *new_cp;
4257 *delta = NULL;
4258 for (i = 0; i < ivs->upto; i++)
4260 use = iv_use (data, i);
4261 old_cp = iv_ca_cand_for_use (ivs, use);
4263 if (old_cp
4264 && old_cp->cand == cand)
4265 continue;
4267 new_cp = get_use_iv_cost (data, use, cand);
4268 if (!new_cp)
4269 continue;
4271 if (!iv_ca_has_deps (ivs, new_cp))
4272 continue;
4274 if (!cheaper_cost_pair (new_cp, old_cp))
4275 continue;
4277 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4280 iv_ca_delta_commit (data, ivs, *delta, true);
4281 cost = iv_ca_cost (ivs);
4282 if (n_ivs)
4283 *n_ivs = iv_ca_n_cands (ivs);
4284 iv_ca_delta_commit (data, ivs, *delta, false);
4286 return cost;
4289 /* Try narrowing set IVS by removing CAND. Return the cost of
4290 the new set and store the differences in DELTA. */
4292 static unsigned
4293 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4294 struct iv_cand *cand, struct iv_ca_delta **delta)
4296 unsigned i, ci;
4297 struct iv_use *use;
4298 struct cost_pair *old_cp, *new_cp, *cp;
4299 bitmap_iterator bi;
4300 struct iv_cand *cnd;
4301 unsigned cost;
4303 *delta = NULL;
4304 for (i = 0; i < n_iv_uses (data); i++)
4306 use = iv_use (data, i);
4308 old_cp = iv_ca_cand_for_use (ivs, use);
4309 if (old_cp->cand != cand)
4310 continue;
4312 new_cp = NULL;
4314 if (data->consider_all_candidates)
4316 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4318 if (ci == cand->id)
4319 continue;
4321 cnd = iv_cand (data, ci);
4323 cp = get_use_iv_cost (data, use, cnd);
4324 if (!cp)
4325 continue;
4326 if (!iv_ca_has_deps (ivs, cp))
4327 continue;
4329 if (!cheaper_cost_pair (cp, new_cp))
4330 continue;
4332 new_cp = cp;
4335 else
4337 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4339 if (ci == cand->id)
4340 continue;
4342 cnd = iv_cand (data, ci);
4344 cp = get_use_iv_cost (data, use, cnd);
4345 if (!cp)
4346 continue;
4347 if (!iv_ca_has_deps (ivs, cp))
4348 continue;
4350 if (!cheaper_cost_pair (cp, new_cp))
4351 continue;
4353 new_cp = cp;
4357 if (!new_cp)
4359 iv_ca_delta_free (delta);
4360 return INFTY;
4363 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4366 iv_ca_delta_commit (data, ivs, *delta, true);
4367 cost = iv_ca_cost (ivs);
4368 iv_ca_delta_commit (data, ivs, *delta, false);
4370 return cost;
4373 /* Try optimizing the set of candidates IVS by removing candidates different
4374 from to EXCEPT_CAND from it. Return cost of the new set, and store
4375 differences in DELTA. */
4377 static unsigned
4378 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4379 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4381 bitmap_iterator bi;
4382 struct iv_ca_delta *act_delta, *best_delta;
4383 unsigned i, best_cost, acost;
4384 struct iv_cand *cand;
4386 best_delta = NULL;
4387 best_cost = iv_ca_cost (ivs);
4389 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4391 cand = iv_cand (data, i);
4393 if (cand == except_cand)
4394 continue;
4396 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4398 if (acost < best_cost)
4400 best_cost = acost;
4401 iv_ca_delta_free (&best_delta);
4402 best_delta = act_delta;
4404 else
4405 iv_ca_delta_free (&act_delta);
4408 if (!best_delta)
4410 *delta = NULL;
4411 return best_cost;
4414 /* Recurse to possibly remove other unnecessary ivs. */
4415 iv_ca_delta_commit (data, ivs, best_delta, true);
4416 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4417 iv_ca_delta_commit (data, ivs, best_delta, false);
4418 *delta = iv_ca_delta_join (best_delta, *delta);
4419 return best_cost;
4422 /* Tries to extend the sets IVS in the best possible way in order
4423 to express the USE. */
4425 static bool
4426 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4427 struct iv_use *use)
4429 unsigned best_cost, act_cost;
4430 unsigned i;
4431 bitmap_iterator bi;
4432 struct iv_cand *cand;
4433 struct iv_ca_delta *best_delta = NULL, *act_delta;
4434 struct cost_pair *cp;
4436 iv_ca_add_use (data, ivs, use);
4437 best_cost = iv_ca_cost (ivs);
4439 cp = iv_ca_cand_for_use (ivs, use);
4440 if (cp)
4442 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4443 iv_ca_set_no_cp (data, ivs, use);
4446 /* First try important candidates. Only if it fails, try the specific ones.
4447 Rationale -- in loops with many variables the best choice often is to use
4448 just one generic biv. If we added here many ivs specific to the uses,
4449 the optimization algorithm later would be likely to get stuck in a local
4450 minimum, thus causing us to create too many ivs. The approach from
4451 few ivs to more seems more likely to be successful -- starting from few
4452 ivs, replacing an expensive use by a specific iv should always be a
4453 win. */
4454 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4456 cand = iv_cand (data, i);
4458 if (iv_ca_cand_used_p (ivs, cand))
4459 continue;
4461 cp = get_use_iv_cost (data, use, cand);
4462 if (!cp)
4463 continue;
4465 iv_ca_set_cp (data, ivs, use, cp);
4466 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4467 iv_ca_set_no_cp (data, ivs, use);
4468 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4470 if (act_cost < best_cost)
4472 best_cost = act_cost;
4474 iv_ca_delta_free (&best_delta);
4475 best_delta = act_delta;
4477 else
4478 iv_ca_delta_free (&act_delta);
4481 if (best_cost == INFTY)
4483 for (i = 0; i < use->n_map_members; i++)
4485 cp = use->cost_map + i;
4486 cand = cp->cand;
4487 if (!cand)
4488 continue;
4490 /* Already tried this. */
4491 if (cand->important)
4492 continue;
4494 if (iv_ca_cand_used_p (ivs, cand))
4495 continue;
4497 act_delta = NULL;
4498 iv_ca_set_cp (data, ivs, use, cp);
4499 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4500 iv_ca_set_no_cp (data, ivs, use);
4501 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4502 cp, act_delta);
4504 if (act_cost < best_cost)
4506 best_cost = act_cost;
4508 if (best_delta)
4509 iv_ca_delta_free (&best_delta);
4510 best_delta = act_delta;
4512 else
4513 iv_ca_delta_free (&act_delta);
4517 iv_ca_delta_commit (data, ivs, best_delta, true);
4518 iv_ca_delta_free (&best_delta);
4520 return (best_cost != INFTY);
4523 /* Finds an initial assignment of candidates to uses. */
4525 static struct iv_ca *
4526 get_initial_solution (struct ivopts_data *data)
4528 struct iv_ca *ivs = iv_ca_new (data);
4529 unsigned i;
4531 for (i = 0; i < n_iv_uses (data); i++)
4532 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4534 iv_ca_free (&ivs);
4535 return NULL;
4538 return ivs;
4541 /* Tries to improve set of induction variables IVS. */
4543 static bool
4544 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4546 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4547 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4548 struct iv_cand *cand;
4550 /* Try extending the set of induction variables by one. */
4551 for (i = 0; i < n_iv_cands (data); i++)
4553 cand = iv_cand (data, i);
4555 if (iv_ca_cand_used_p (ivs, cand))
4556 continue;
4558 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4559 if (!act_delta)
4560 continue;
4562 /* If we successfully added the candidate and the set is small enough,
4563 try optimizing it by removing other candidates. */
4564 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4566 iv_ca_delta_commit (data, ivs, act_delta, true);
4567 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4568 iv_ca_delta_commit (data, ivs, act_delta, false);
4569 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4572 if (acost < best_cost)
4574 best_cost = acost;
4575 iv_ca_delta_free (&best_delta);
4576 best_delta = act_delta;
4578 else
4579 iv_ca_delta_free (&act_delta);
4582 if (!best_delta)
4584 /* Try removing the candidates from the set instead. */
4585 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4587 /* Nothing more we can do. */
4588 if (!best_delta)
4589 return false;
4592 iv_ca_delta_commit (data, ivs, best_delta, true);
4593 gcc_assert (best_cost == iv_ca_cost (ivs));
4594 iv_ca_delta_free (&best_delta);
4595 return true;
4598 /* Attempts to find the optimal set of induction variables. We do simple
4599 greedy heuristic -- we try to replace at most one candidate in the selected
4600 solution and remove the unused ivs while this improves the cost. */
4602 static struct iv_ca *
4603 find_optimal_iv_set (struct ivopts_data *data)
4605 unsigned i;
4606 struct iv_ca *set;
4607 struct iv_use *use;
4609 /* Get the initial solution. */
4610 set = get_initial_solution (data);
4611 if (!set)
4613 if (dump_file && (dump_flags & TDF_DETAILS))
4614 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4615 return NULL;
4618 if (dump_file && (dump_flags & TDF_DETAILS))
4620 fprintf (dump_file, "Initial set of candidates:\n");
4621 iv_ca_dump (data, dump_file, set);
4624 while (try_improve_iv_set (data, set))
4626 if (dump_file && (dump_flags & TDF_DETAILS))
4628 fprintf (dump_file, "Improved to:\n");
4629 iv_ca_dump (data, dump_file, set);
4633 if (dump_file && (dump_flags & TDF_DETAILS))
4634 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4636 for (i = 0; i < n_iv_uses (data); i++)
4638 use = iv_use (data, i);
4639 use->selected = iv_ca_cand_for_use (set, use)->cand;
4642 return set;
4645 /* Creates a new induction variable corresponding to CAND. */
4647 static void
4648 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4650 block_stmt_iterator incr_pos;
4651 tree base;
4652 bool after = false;
4654 if (!cand->iv)
4655 return;
4657 switch (cand->pos)
4659 case IP_NORMAL:
4660 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4661 break;
4663 case IP_END:
4664 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4665 after = true;
4666 break;
4668 case IP_ORIGINAL:
4669 /* Mark that the iv is preserved. */
4670 name_info (data, cand->var_before)->preserve_biv = true;
4671 name_info (data, cand->var_after)->preserve_biv = true;
4673 /* Rewrite the increment so that it uses var_before directly. */
4674 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4676 return;
4679 gimple_add_tmp_var (cand->var_before);
4680 add_referenced_tmp_var (cand->var_before);
4682 base = unshare_expr (cand->iv->base);
4684 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4685 &incr_pos, after, &cand->var_before, &cand->var_after);
4688 /* Creates new induction variables described in SET. */
4690 static void
4691 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4693 unsigned i;
4694 struct iv_cand *cand;
4695 bitmap_iterator bi;
4697 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4699 cand = iv_cand (data, i);
4700 create_new_iv (data, cand);
4704 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4705 is true, remove also the ssa name defined by the statement. */
4707 static void
4708 remove_statement (tree stmt, bool including_defined_name)
4710 if (TREE_CODE (stmt) == PHI_NODE)
4712 if (!including_defined_name)
4714 /* Prevent the ssa name defined by the statement from being removed. */
4715 SET_PHI_RESULT (stmt, NULL);
4717 remove_phi_node (stmt, NULL_TREE);
4719 else
4721 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4723 bsi_remove (&bsi);
4727 /* Rewrites USE (definition of iv used in a nonlinear expression)
4728 using candidate CAND. */
4730 static void
4731 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4732 struct iv_use *use, struct iv_cand *cand)
4734 tree comp;
4735 tree op, stmts, tgt, ass;
4736 block_stmt_iterator bsi, pbsi;
4738 /* An important special case -- if we are asked to express value of
4739 the original iv by itself, just exit; there is no need to
4740 introduce a new computation (that might also need casting the
4741 variable to unsigned and back). */
4742 if (cand->pos == IP_ORIGINAL
4743 && TREE_CODE (use->stmt) == MODIFY_EXPR
4744 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4746 op = TREE_OPERAND (use->stmt, 1);
4748 /* Be a bit careful. In case variable is expressed in some
4749 complicated way, rewrite it so that we may get rid of this
4750 complicated expression. */
4751 if ((TREE_CODE (op) == PLUS_EXPR
4752 || TREE_CODE (op) == MINUS_EXPR)
4753 && TREE_OPERAND (op, 0) == cand->var_before
4754 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4755 return;
4758 comp = unshare_expr (get_computation (data->current_loop,
4759 use, cand));
4760 switch (TREE_CODE (use->stmt))
4762 case PHI_NODE:
4763 tgt = PHI_RESULT (use->stmt);
4765 /* If we should keep the biv, do not replace it. */
4766 if (name_info (data, tgt)->preserve_biv)
4767 return;
4769 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4770 while (!bsi_end_p (pbsi)
4771 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4773 bsi = pbsi;
4774 bsi_next (&pbsi);
4776 break;
4778 case MODIFY_EXPR:
4779 tgt = TREE_OPERAND (use->stmt, 0);
4780 bsi = bsi_for_stmt (use->stmt);
4781 break;
4783 default:
4784 gcc_unreachable ();
4787 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4789 if (TREE_CODE (use->stmt) == PHI_NODE)
4791 if (stmts)
4792 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4793 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4794 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4795 remove_statement (use->stmt, false);
4796 SSA_NAME_DEF_STMT (tgt) = ass;
4798 else
4800 if (stmts)
4801 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4802 TREE_OPERAND (use->stmt, 1) = op;
4806 /* Replaces ssa name in index IDX by its basic variable. Callback for
4807 for_each_index. */
4809 static bool
4810 idx_remove_ssa_names (tree base, tree *idx,
4811 void *data ATTRIBUTE_UNUSED)
4813 tree *op;
4815 if (TREE_CODE (*idx) == SSA_NAME)
4816 *idx = SSA_NAME_VAR (*idx);
4818 if (TREE_CODE (base) == ARRAY_REF)
4820 op = &TREE_OPERAND (base, 2);
4821 if (*op
4822 && TREE_CODE (*op) == SSA_NAME)
4823 *op = SSA_NAME_VAR (*op);
4824 op = &TREE_OPERAND (base, 3);
4825 if (*op
4826 && TREE_CODE (*op) == SSA_NAME)
4827 *op = SSA_NAME_VAR (*op);
4830 return true;
4833 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4835 static tree
4836 unshare_and_remove_ssa_names (tree ref)
4838 ref = unshare_expr (ref);
4839 for_each_index (&ref, idx_remove_ssa_names, NULL);
4841 return ref;
4844 /* Rewrites base of memory access OP with expression WITH in statement
4845 pointed to by BSI. */
4847 static void
4848 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4850 tree bvar, var, new_name, copy, name;
4851 tree orig;
4853 var = bvar = get_base_address (*op);
4855 if (!var || TREE_CODE (with) != SSA_NAME)
4856 goto do_rewrite;
4858 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4859 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4860 if (TREE_CODE (var) == INDIRECT_REF)
4861 var = TREE_OPERAND (var, 0);
4862 if (TREE_CODE (var) == SSA_NAME)
4864 name = var;
4865 var = SSA_NAME_VAR (var);
4867 else if (DECL_P (var))
4868 name = NULL_TREE;
4869 else
4870 goto do_rewrite;
4872 /* We need to add a memory tag for the variable. But we do not want
4873 to add it to the temporary used for the computations, since this leads
4874 to problems in redundancy elimination when there are common parts
4875 in two computations referring to the different arrays. So we copy
4876 the variable to a new temporary. */
4877 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4879 if (name)
4880 new_name = duplicate_ssa_name (name, copy);
4881 else
4883 tree tag = var_ann (var)->type_mem_tag;
4884 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4885 add_referenced_tmp_var (new_ptr);
4886 if (tag)
4887 var_ann (new_ptr)->type_mem_tag = tag;
4888 else
4889 add_type_alias (new_ptr, var);
4890 new_name = make_ssa_name (new_ptr, copy);
4893 TREE_OPERAND (copy, 0) = new_name;
4894 update_stmt (copy);
4895 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4896 with = new_name;
4898 do_rewrite:
4900 orig = NULL_TREE;
4901 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4902 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4904 if (TREE_CODE (*op) == INDIRECT_REF)
4905 orig = REF_ORIGINAL (*op);
4906 if (!orig)
4907 orig = unshare_and_remove_ssa_names (*op);
4909 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4911 /* Record the original reference, for purposes of alias analysis. */
4912 REF_ORIGINAL (*op) = orig;
4914 /* Virtual operands in the original statement may have to be renamed
4915 because of the replacement. */
4916 mark_new_vars_to_rename (bsi_stmt (*bsi));
4919 /* Rewrites USE (address that is an iv) using candidate CAND. */
4921 static void
4922 rewrite_use_address (struct ivopts_data *data,
4923 struct iv_use *use, struct iv_cand *cand)
4925 tree comp = unshare_expr (get_computation (data->current_loop,
4926 use, cand));
4927 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4928 tree stmts;
4929 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4931 if (stmts)
4932 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4934 rewrite_address_base (&bsi, use->op_p, op);
4937 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4938 candidate CAND. */
4940 static void
4941 rewrite_use_compare (struct ivopts_data *data,
4942 struct iv_use *use, struct iv_cand *cand)
4944 tree comp;
4945 tree *op_p, cond, op, stmts, bound;
4946 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4947 enum tree_code compare;
4948 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
4950 bound = cp->value;
4951 if (bound)
4953 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4954 tree var_type = TREE_TYPE (var);
4956 compare = iv_elimination_compare (data, use);
4957 bound = fold_convert (var_type, bound);
4958 op = force_gimple_operand (unshare_expr (bound), &stmts,
4959 true, NULL_TREE);
4961 if (stmts)
4962 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4964 *use->op_p = build2 (compare, boolean_type_node, var, op);
4965 update_stmt (use->stmt);
4966 return;
4969 /* The induction variable elimination failed; just express the original
4970 giv. */
4971 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4973 cond = *use->op_p;
4974 op_p = &TREE_OPERAND (cond, 0);
4975 if (TREE_CODE (*op_p) != SSA_NAME
4976 || zero_p (get_iv (data, *op_p)->step))
4977 op_p = &TREE_OPERAND (cond, 1);
4979 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4980 if (stmts)
4981 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4983 *op_p = op;
4986 /* Ensure that operand *OP_P may be used at the end of EXIT without
4987 violating loop closed ssa form. */
4989 static void
4990 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4992 basic_block def_bb;
4993 struct loop *def_loop;
4994 tree phi, use;
4996 use = USE_FROM_PTR (op_p);
4997 if (TREE_CODE (use) != SSA_NAME)
4998 return;
5000 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5001 if (!def_bb)
5002 return;
5004 def_loop = def_bb->loop_father;
5005 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5006 return;
5008 /* Try finding a phi node that copies the value out of the loop. */
5009 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5010 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5011 break;
5013 if (!phi)
5015 /* Create such a phi node. */
5016 tree new_name = duplicate_ssa_name (use, NULL);
5018 phi = create_phi_node (new_name, exit->dest);
5019 SSA_NAME_DEF_STMT (new_name) = phi;
5020 add_phi_arg (phi, use, exit);
5023 SET_USE (op_p, PHI_RESULT (phi));
5026 /* Ensure that operands of STMT may be used at the end of EXIT without
5027 violating loop closed ssa form. */
5029 static void
5030 protect_loop_closed_ssa_form (edge exit, tree stmt)
5032 use_optype uses;
5033 vuse_optype vuses;
5034 v_may_def_optype v_may_defs;
5035 unsigned i;
5037 uses = STMT_USE_OPS (stmt);
5038 for (i = 0; i < NUM_USES (uses); i++)
5039 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5041 vuses = STMT_VUSE_OPS (stmt);
5042 for (i = 0; i < NUM_VUSES (vuses); i++)
5043 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5045 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5046 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5047 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5050 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5051 so that they are emitted on the correct place, and so that the loop closed
5052 ssa form is preserved. */
5054 static void
5055 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5057 tree_stmt_iterator tsi;
5058 block_stmt_iterator bsi;
5059 tree phi, stmt, def, next;
5061 if (!single_pred_p (exit->dest))
5062 split_loop_exit_edge (exit);
5064 /* Ensure there is label in exit->dest, so that we can
5065 insert after it. */
5066 tree_block_label (exit->dest);
5067 bsi = bsi_after_labels (exit->dest);
5069 if (TREE_CODE (stmts) == STATEMENT_LIST)
5071 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5073 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5074 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5077 else
5079 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5080 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5083 if (!op)
5084 return;
5086 for (phi = phi_nodes (exit->dest); phi; phi = next)
5088 next = PHI_CHAIN (phi);
5090 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5092 def = PHI_RESULT (phi);
5093 remove_statement (phi, false);
5094 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5095 def, op);
5096 SSA_NAME_DEF_STMT (def) = stmt;
5097 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5102 /* Rewrites the final value of USE (that is only needed outside of the loop)
5103 using candidate CAND. */
5105 static void
5106 rewrite_use_outer (struct ivopts_data *data,
5107 struct iv_use *use, struct iv_cand *cand)
5109 edge exit;
5110 tree value, op, stmts, tgt;
5111 tree phi;
5113 switch (TREE_CODE (use->stmt))
5115 case PHI_NODE:
5116 tgt = PHI_RESULT (use->stmt);
5117 break;
5118 case MODIFY_EXPR:
5119 tgt = TREE_OPERAND (use->stmt, 0);
5120 break;
5121 default:
5122 gcc_unreachable ();
5125 exit = single_dom_exit (data->current_loop);
5127 if (exit)
5129 if (!cand->iv)
5131 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5132 value = cp->value;
5134 else
5135 value = get_computation_at (data->current_loop,
5136 use, cand, last_stmt (exit->src));
5138 value = unshare_expr (value);
5139 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5141 /* If we will preserve the iv anyway and we would need to perform
5142 some computation to replace the final value, do nothing. */
5143 if (stmts && name_info (data, tgt)->preserve_biv)
5144 return;
5146 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5148 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5150 if (USE_FROM_PTR (use_p) == tgt)
5151 SET_USE (use_p, op);
5154 if (stmts)
5155 compute_phi_arg_on_exit (exit, stmts, op);
5157 /* Enable removal of the statement. We cannot remove it directly,
5158 since we may still need the aliasing information attached to the
5159 ssa name defined by it. */
5160 name_info (data, tgt)->iv->have_use_for = false;
5161 return;
5164 /* If the variable is going to be preserved anyway, there is nothing to
5165 do. */
5166 if (name_info (data, tgt)->preserve_biv)
5167 return;
5169 /* Otherwise we just need to compute the iv. */
5170 rewrite_use_nonlinear_expr (data, use, cand);
5173 /* Rewrites USE using candidate CAND. */
5175 static void
5176 rewrite_use (struct ivopts_data *data,
5177 struct iv_use *use, struct iv_cand *cand)
5179 switch (use->type)
5181 case USE_NONLINEAR_EXPR:
5182 rewrite_use_nonlinear_expr (data, use, cand);
5183 break;
5185 case USE_OUTER:
5186 rewrite_use_outer (data, use, cand);
5187 break;
5189 case USE_ADDRESS:
5190 rewrite_use_address (data, use, cand);
5191 break;
5193 case USE_COMPARE:
5194 rewrite_use_compare (data, use, cand);
5195 break;
5197 default:
5198 gcc_unreachable ();
5200 update_stmt (use->stmt);
5203 /* Rewrite the uses using the selected induction variables. */
5205 static void
5206 rewrite_uses (struct ivopts_data *data)
5208 unsigned i;
5209 struct iv_cand *cand;
5210 struct iv_use *use;
5212 for (i = 0; i < n_iv_uses (data); i++)
5214 use = iv_use (data, i);
5215 cand = use->selected;
5216 gcc_assert (cand);
5218 rewrite_use (data, use, cand);
5222 /* Removes the ivs that are not used after rewriting. */
5224 static void
5225 remove_unused_ivs (struct ivopts_data *data)
5227 unsigned j;
5228 bitmap_iterator bi;
5230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5232 struct version_info *info;
5234 info = ver_info (data, j);
5235 if (info->iv
5236 && !zero_p (info->iv->step)
5237 && !info->inv_id
5238 && !info->iv->have_use_for
5239 && !info->preserve_biv)
5240 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5244 /* Frees data allocated by the optimization of a single loop. */
5246 static void
5247 free_loop_data (struct ivopts_data *data)
5249 unsigned i, j;
5250 bitmap_iterator bi;
5251 tree obj;
5253 htab_empty (data->niters);
5255 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5257 struct version_info *info;
5259 info = ver_info (data, i);
5260 if (info->iv)
5261 free (info->iv);
5262 info->iv = NULL;
5263 info->has_nonlin_use = false;
5264 info->preserve_biv = false;
5265 info->inv_id = 0;
5267 bitmap_clear (data->relevant);
5268 bitmap_clear (data->important_candidates);
5270 for (i = 0; i < n_iv_uses (data); i++)
5272 struct iv_use *use = iv_use (data, i);
5274 free (use->iv);
5275 BITMAP_FREE (use->related_cands);
5276 for (j = 0; j < use->n_map_members; j++)
5277 if (use->cost_map[j].depends_on)
5278 BITMAP_FREE (use->cost_map[j].depends_on);
5279 free (use->cost_map);
5280 free (use);
5282 VEC_truncate (iv_use_p, data->iv_uses, 0);
5284 for (i = 0; i < n_iv_cands (data); i++)
5286 struct iv_cand *cand = iv_cand (data, i);
5288 if (cand->iv)
5289 free (cand->iv);
5290 free (cand);
5292 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5294 if (data->version_info_size < num_ssa_names)
5296 data->version_info_size = 2 * num_ssa_names;
5297 free (data->version_info);
5298 data->version_info = xcalloc (data->version_info_size,
5299 sizeof (struct version_info));
5302 data->max_inv_id = 0;
5304 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5305 SET_DECL_RTL (obj, NULL_RTX);
5307 VEC_truncate (tree, decl_rtl_to_reset, 0);
5310 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5311 loop tree. */
5313 static void
5314 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5316 unsigned i;
5318 for (i = 1; i < loops->num; i++)
5319 if (loops->parray[i])
5321 free (loops->parray[i]->aux);
5322 loops->parray[i]->aux = NULL;
5325 free_loop_data (data);
5326 free (data->version_info);
5327 BITMAP_FREE (data->relevant);
5328 BITMAP_FREE (data->important_candidates);
5329 htab_delete (data->niters);
5331 VEC_free (tree, heap, decl_rtl_to_reset);
5332 VEC_free (iv_use_p, heap, data->iv_uses);
5333 VEC_free (iv_cand_p, heap, data->iv_candidates);
5336 /* Optimizes the LOOP. Returns true if anything changed. */
5338 static bool
5339 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5341 bool changed = false;
5342 struct iv_ca *iv_ca;
5343 edge exit;
5345 data->current_loop = loop;
5347 if (dump_file && (dump_flags & TDF_DETAILS))
5349 fprintf (dump_file, "Processing loop %d\n", loop->num);
5351 exit = single_dom_exit (loop);
5352 if (exit)
5354 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5355 exit->src->index, exit->dest->index);
5356 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5357 fprintf (dump_file, "\n");
5360 fprintf (dump_file, "\n");
5363 /* For each ssa name determines whether it behaves as an induction variable
5364 in some loop. */
5365 if (!find_induction_variables (data))
5366 goto finish;
5368 /* Finds interesting uses (item 1). */
5369 find_interesting_uses (data);
5370 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5371 goto finish;
5373 /* Finds candidates for the induction variables (item 2). */
5374 find_iv_candidates (data);
5376 /* Calculates the costs (item 3, part 1). */
5377 determine_use_iv_costs (data);
5378 determine_iv_costs (data);
5379 determine_set_costs (data);
5381 /* Find the optimal set of induction variables (item 3, part 2). */
5382 iv_ca = find_optimal_iv_set (data);
5383 if (!iv_ca)
5384 goto finish;
5385 changed = true;
5387 /* Create the new induction variables (item 4, part 1). */
5388 create_new_ivs (data, iv_ca);
5389 iv_ca_free (&iv_ca);
5391 /* Rewrite the uses (item 4, part 2). */
5392 rewrite_uses (data);
5394 /* Remove the ivs that are unused after rewriting. */
5395 remove_unused_ivs (data);
5397 /* We have changed the structure of induction variables; it might happen
5398 that definitions in the scev database refer to some of them that were
5399 eliminated. */
5400 scev_reset ();
5402 finish:
5403 free_loop_data (data);
5405 return changed;
5408 /* Main entry point. Optimizes induction variables in LOOPS. */
5410 void
5411 tree_ssa_iv_optimize (struct loops *loops)
5413 struct loop *loop;
5414 struct ivopts_data data;
5416 tree_ssa_iv_optimize_init (loops, &data);
5418 /* Optimize the loops starting with the innermost ones. */
5419 loop = loops->tree_root;
5420 while (loop->inner)
5421 loop = loop->inner;
5423 /* Scan the loops, inner ones first. */
5424 while (loop != loops->tree_root)
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5427 flow_loop_dump (loop, dump_file, NULL, 1);
5429 tree_ssa_iv_optimize_loop (&data, loop);
5431 if (loop->next)
5433 loop = loop->next;
5434 while (loop->inner)
5435 loop = loop->inner;
5437 else
5438 loop = loop->outer;
5441 /* FIXME. IV opts introduces new aliases and call-clobbered
5442 variables, which need to be renamed. However, when we call the
5443 renamer, not all statements will be scanned for operands. In
5444 particular, the newly introduced aliases may appear in statements
5445 that are considered "unmodified", so the renamer will not get a
5446 chance to rename those operands.
5448 Work around this problem by forcing an operand re-scan on every
5449 statement. This will not be necessary once the new operand
5450 scanner is implemented. */
5451 if (need_ssa_update_p ())
5453 basic_block bb;
5454 block_stmt_iterator si;
5455 FOR_EACH_BB (bb)
5456 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5457 update_stmt (bsi_stmt (si));
5460 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5461 tree_ssa_iv_optimize_finalize (loops, &data);