* tree-ssa-loop-ivopts.c (ivopts_data, decl_rtl_to_reset,
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
bloba62c1cda8f49b6b016ce8e611c91c929ee88218a
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
198 typedef struct iv_use *iv_use_p;
199 DEF_VEC_P(iv_use_p);
200 DEF_VEC_ALLOC_P(iv_use_p,heap);
202 typedef struct iv_cand *iv_cand_p;
203 DEF_VEC_P(iv_cand_p);
204 DEF_VEC_ALLOC_P(iv_cand_p,heap);
206 struct ivopts_data
208 /* The currently optimized loop. */
209 struct loop *current_loop;
211 /* Numbers of iterations for all exits of the current loop. */
212 htab_t niters;
214 /* The size of version_info array allocated. */
215 unsigned version_info_size;
217 /* The array of information for the ssa names. */
218 struct version_info *version_info;
220 /* The bitmap of indices in version_info whose value was changed. */
221 bitmap relevant;
223 /* The maximum invariant id. */
224 unsigned max_inv_id;
226 /* The uses of induction variables. */
227 VEC(iv_use_p,heap) *iv_uses;
229 /* The candidates. */
230 VEC(iv_cand_p,heap) *iv_candidates;
232 /* A bitmap of important candidates. */
233 bitmap important_candidates;
235 /* Whether to consider just related and important candidates when replacing a
236 use. */
237 bool consider_all_candidates;
240 /* An assignment of iv candidates to uses. */
242 struct iv_ca
244 /* The number of uses covered by the assignment. */
245 unsigned upto;
247 /* Number of uses that cannot be expressed by the candidates in the set. */
248 unsigned bad_uses;
250 /* Candidate assigned to a use, together with the related costs. */
251 struct cost_pair **cand_for_use;
253 /* Number of times each candidate is used. */
254 unsigned *n_cand_uses;
256 /* The candidates used. */
257 bitmap cands;
259 /* The number of candidates in the set. */
260 unsigned n_cands;
262 /* Total number of registers needed. */
263 unsigned n_regs;
265 /* Total cost of expressing uses. */
266 unsigned cand_use_cost;
268 /* Total cost of candidates. */
269 unsigned cand_cost;
271 /* Number of times each invariant is used. */
272 unsigned *n_invariant_uses;
274 /* Total cost of the assignment. */
275 unsigned cost;
278 /* Difference of two iv candidate assignments. */
280 struct iv_ca_delta
282 /* Changed use. */
283 struct iv_use *use;
285 /* An old assignment (for rollback purposes). */
286 struct cost_pair *old_cp;
288 /* A new assignment. */
289 struct cost_pair *new_cp;
291 /* Next change in the list. */
292 struct iv_ca_delta *next_change;
295 /* Bound on number of candidates below that all candidates are considered. */
297 #define CONSIDER_ALL_CANDIDATES_BOUND \
298 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
300 /* If there are more iv occurrences, we just give up (it is quite unlikely that
301 optimizing such a loop would help, and it would take ages). */
303 #define MAX_CONSIDERED_USES \
304 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
306 /* If there are at most this number of ivs in the set, try removing unnecessary
307 ivs from the set always. */
309 #define ALWAYS_PRUNE_CAND_SET_BOUND \
310 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
312 /* The list of trees for that the decl_rtl field must be reset is stored
313 here. */
315 static VEC(tree,heap) *decl_rtl_to_reset;
317 /* Number of uses recorded in DATA. */
319 static inline unsigned
320 n_iv_uses (struct ivopts_data *data)
322 return VEC_length (iv_use_p, data->iv_uses);
325 /* Ith use recorded in DATA. */
327 static inline struct iv_use *
328 iv_use (struct ivopts_data *data, unsigned i)
330 return VEC_index (iv_use_p, data->iv_uses, i);
333 /* Number of candidates recorded in DATA. */
335 static inline unsigned
336 n_iv_cands (struct ivopts_data *data)
338 return VEC_length (iv_cand_p, data->iv_candidates);
341 /* Ith candidate recorded in DATA. */
343 static inline struct iv_cand *
344 iv_cand (struct ivopts_data *data, unsigned i)
346 return VEC_index (iv_cand_p, data->iv_candidates, i);
349 /* The data for LOOP. */
351 static inline struct loop_data *
352 loop_data (struct loop *loop)
354 return loop->aux;
357 /* The single loop exit if it dominates the latch, NULL otherwise. */
359 static edge
360 single_dom_exit (struct loop *loop)
362 edge exit = loop->single_exit;
364 if (!exit)
365 return NULL;
367 if (!just_once_each_iteration_p (loop, exit->src))
368 return NULL;
370 return exit;
373 /* Dumps information about the induction variable IV to FILE. */
375 extern void dump_iv (FILE *, struct iv *);
376 void
377 dump_iv (FILE *file, struct iv *iv)
379 if (iv->ssa_name)
381 fprintf (file, "ssa name ");
382 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
383 fprintf (file, "\n");
386 fprintf (file, " type ");
387 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
388 fprintf (file, "\n");
390 if (iv->step)
392 fprintf (file, " base ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
396 fprintf (file, " step ");
397 print_generic_expr (file, iv->step, TDF_SLIM);
398 fprintf (file, "\n");
400 else
402 fprintf (file, " invariant ");
403 print_generic_expr (file, iv->base, TDF_SLIM);
404 fprintf (file, "\n");
407 if (iv->base_object)
409 fprintf (file, " base object ");
410 print_generic_expr (file, iv->base_object, TDF_SLIM);
411 fprintf (file, "\n");
414 if (iv->biv_p)
415 fprintf (file, " is a biv\n");
418 /* Dumps information about the USE to FILE. */
420 extern void dump_use (FILE *, struct iv_use *);
421 void
422 dump_use (FILE *file, struct iv_use *use)
424 fprintf (file, "use %d\n", use->id);
426 switch (use->type)
428 case USE_NONLINEAR_EXPR:
429 fprintf (file, " generic\n");
430 break;
432 case USE_OUTER:
433 fprintf (file, " outside\n");
434 break;
436 case USE_ADDRESS:
437 fprintf (file, " address\n");
438 break;
440 case USE_COMPARE:
441 fprintf (file, " compare\n");
442 break;
444 default:
445 gcc_unreachable ();
448 fprintf (file, " in statement ");
449 print_generic_expr (file, use->stmt, TDF_SLIM);
450 fprintf (file, "\n");
452 fprintf (file, " at position ");
453 if (use->op_p)
454 print_generic_expr (file, *use->op_p, TDF_SLIM);
455 fprintf (file, "\n");
457 dump_iv (file, use->iv);
459 if (use->related_cands)
461 fprintf (file, " related candidates ");
462 dump_bitmap (file, use->related_cands);
466 /* Dumps information about the uses to FILE. */
468 extern void dump_uses (FILE *, struct ivopts_data *);
469 void
470 dump_uses (FILE *file, struct ivopts_data *data)
472 unsigned i;
473 struct iv_use *use;
475 for (i = 0; i < n_iv_uses (data); i++)
477 use = iv_use (data, i);
479 dump_use (file, use);
480 fprintf (file, "\n");
484 /* Dumps information about induction variable candidate CAND to FILE. */
486 extern void dump_cand (FILE *, struct iv_cand *);
487 void
488 dump_cand (FILE *file, struct iv_cand *cand)
490 struct iv *iv = cand->iv;
492 fprintf (file, "candidate %d%s\n",
493 cand->id, cand->important ? " (important)" : "");
495 if (!iv)
497 fprintf (file, " final value replacement\n");
498 return;
501 switch (cand->pos)
503 case IP_NORMAL:
504 fprintf (file, " incremented before exit test\n");
505 break;
507 case IP_END:
508 fprintf (file, " incremented at end\n");
509 break;
511 case IP_ORIGINAL:
512 fprintf (file, " original biv\n");
513 break;
516 dump_iv (file, iv);
519 /* Returns the info for ssa version VER. */
521 static inline struct version_info *
522 ver_info (struct ivopts_data *data, unsigned ver)
524 return data->version_info + ver;
527 /* Returns the info for ssa name NAME. */
529 static inline struct version_info *
530 name_info (struct ivopts_data *data, tree name)
532 return ver_info (data, SSA_NAME_VERSION (name));
535 /* Checks whether there exists number X such that X * B = A, counting modulo
536 2^BITS. */
538 static bool
539 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
540 HOST_WIDE_INT *x)
542 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
543 unsigned HOST_WIDE_INT inv, ex, val;
544 unsigned i;
546 a &= mask;
547 b &= mask;
549 /* First divide the whole equation by 2 as long as possible. */
550 while (!(a & 1) && !(b & 1))
552 a >>= 1;
553 b >>= 1;
554 bits--;
555 mask >>= 1;
558 if (!(b & 1))
560 /* If b is still even, a is odd and there is no such x. */
561 return false;
564 /* Find the inverse of b. We compute it as
565 b^(2^(bits - 1) - 1) (mod 2^bits). */
566 inv = 1;
567 ex = b;
568 for (i = 0; i < bits - 1; i++)
570 inv = (inv * ex) & mask;
571 ex = (ex * ex) & mask;
574 val = (a * inv) & mask;
576 gcc_assert (((val * b) & mask) == a);
578 if ((val >> (bits - 1)) & 1)
579 val |= ~mask;
581 *x = val;
583 return true;
586 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
587 emitted in LOOP. */
589 static bool
590 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
592 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
594 gcc_assert (bb);
596 if (sbb == loop->latch)
597 return true;
599 if (sbb != bb)
600 return false;
602 return stmt == last_stmt (bb);
605 /* Returns true if STMT if after the place where the original induction
606 variable CAND is incremented. */
608 static bool
609 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
611 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
612 basic_block stmt_bb = bb_for_stmt (stmt);
613 block_stmt_iterator bsi;
615 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
616 return false;
618 if (stmt_bb != cand_bb)
619 return true;
621 /* Scan the block from the end, since the original ivs are usually
622 incremented at the end of the loop body. */
623 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
625 if (bsi_stmt (bsi) == cand->incremented_at)
626 return false;
627 if (bsi_stmt (bsi) == stmt)
628 return true;
632 /* Returns true if STMT if after the place where the induction variable
633 CAND is incremented in LOOP. */
635 static bool
636 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
638 switch (cand->pos)
640 case IP_END:
641 return false;
643 case IP_NORMAL:
644 return stmt_after_ip_normal_pos (loop, stmt);
646 case IP_ORIGINAL:
647 return stmt_after_ip_original_pos (cand, stmt);
649 default:
650 gcc_unreachable ();
654 /* Element of the table in that we cache the numbers of iterations obtained
655 from exits of the loop. */
657 struct nfe_cache_elt
659 /* The edge for that the number of iterations is cached. */
660 edge exit;
662 /* True if the # of iterations was successfully determined. */
663 bool valid_p;
665 /* Description of # of iterations. */
666 struct tree_niter_desc niter;
669 /* Hash function for nfe_cache_elt E. */
671 static hashval_t
672 nfe_hash (const void *e)
674 const struct nfe_cache_elt *elt = e;
676 return htab_hash_pointer (elt->exit);
679 /* Equality function for nfe_cache_elt E1 and edge E2. */
681 static int
682 nfe_eq (const void *e1, const void *e2)
684 const struct nfe_cache_elt *elt1 = e1;
686 return elt1->exit == e2;
689 /* Returns structure describing number of iterations determined from
690 EXIT of DATA->current_loop, or NULL if something goes wrong. */
692 static struct tree_niter_desc *
693 niter_for_exit (struct ivopts_data *data, edge exit)
695 struct nfe_cache_elt *nfe_desc;
696 PTR *slot;
698 slot = htab_find_slot_with_hash (data->niters, exit,
699 htab_hash_pointer (exit),
700 INSERT);
702 if (!*slot)
704 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
705 nfe_desc->exit = exit;
706 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
707 exit, &nfe_desc->niter);
708 *slot = nfe_desc;
710 else
711 nfe_desc = *slot;
713 if (!nfe_desc->valid_p)
714 return NULL;
716 return &nfe_desc->niter;
719 /* Returns structure describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
723 static struct tree_niter_desc *
724 niter_for_single_dom_exit (struct ivopts_data *data)
726 edge exit = single_dom_exit (data->current_loop);
728 if (!exit)
729 return NULL;
731 return niter_for_exit (data, exit);
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. LOOPS is the loop tree. */
737 static void
738 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
740 unsigned i;
742 data->version_info_size = 2 * num_ssa_names;
743 data->version_info = xcalloc (data->version_info_size,
744 sizeof (struct version_info));
745 data->relevant = BITMAP_ALLOC (NULL);
746 data->important_candidates = BITMAP_ALLOC (NULL);
747 data->max_inv_id = 0;
748 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
750 for (i = 1; i < loops->num; i++)
751 if (loops->parray[i])
752 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
754 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
755 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
756 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
759 /* Returns a memory object to that EXPR points. In case we are able to
760 determine that it does not point to any such object, NULL is returned. */
762 static tree
763 determine_base_object (tree expr)
765 enum tree_code code = TREE_CODE (expr);
766 tree base, obj, op0, op1;
768 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
769 return NULL_TREE;
771 switch (code)
773 case INTEGER_CST:
774 return NULL_TREE;
776 case ADDR_EXPR:
777 obj = TREE_OPERAND (expr, 0);
778 base = get_base_address (obj);
780 if (!base)
781 return expr;
783 if (TREE_CODE (base) == INDIRECT_REF)
784 return determine_base_object (TREE_OPERAND (base, 0));
786 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
793 if (!op1)
794 return op0;
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
801 return fold (build (code, ptr_type_node, op0, op1));
803 case NOP_EXPR:
804 case CONVERT_EXPR:
805 return determine_base_object (TREE_OPERAND (expr, 0));
807 default:
808 return fold_convert (ptr_type_node, expr);
812 /* Allocates an induction variable with given initial value BASE and step STEP
813 for loop LOOP. */
815 static struct iv *
816 alloc_iv (tree base, tree step)
818 struct iv *iv = xcalloc (1, sizeof (struct iv));
820 if (step && integer_zerop (step))
821 step = NULL_TREE;
823 iv->base = base;
824 iv->base_object = determine_base_object (base);
825 iv->step = step;
826 iv->biv_p = false;
827 iv->have_use_for = false;
828 iv->use_id = 0;
829 iv->ssa_name = NULL_TREE;
831 return iv;
834 /* Sets STEP and BASE for induction variable IV. */
836 static void
837 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
839 struct version_info *info = name_info (data, iv);
841 gcc_assert (!info->iv);
843 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
844 info->iv = alloc_iv (base, step);
845 info->iv->ssa_name = iv;
848 /* Finds induction variable declaration for VAR. */
850 static struct iv *
851 get_iv (struct ivopts_data *data, tree var)
853 basic_block bb;
855 if (!name_info (data, var)->iv)
857 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
859 if (!bb
860 || !flow_bb_inside_loop_p (data->current_loop, bb))
861 set_iv (data, var, var, NULL_TREE);
864 return name_info (data, var)->iv;
867 /* Determines the step of a biv defined in PHI. */
869 static tree
870 determine_biv_step (tree phi)
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi), base, step;
874 tree type = TREE_TYPE (name);
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
879 if (!simple_iv (loop, phi, name, &base, &step))
880 return NULL_TREE;
882 if (!step)
883 return build_int_cst (type, 0);
885 return step;
888 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
890 static bool
891 abnormal_ssa_name_p (tree exp)
893 if (!exp)
894 return false;
896 if (TREE_CODE (exp) != SSA_NAME)
897 return false;
899 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
902 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
903 abnormal phi node. Callback for for_each_index. */
905 static bool
906 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
907 void *data ATTRIBUTE_UNUSED)
909 if (TREE_CODE (base) == ARRAY_REF)
911 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
912 return false;
913 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
914 return false;
917 return !abnormal_ssa_name_p (*index);
920 /* Returns true if EXPR contains a ssa name that occurs in an
921 abnormal phi node. */
923 static bool
924 contains_abnormal_ssa_name_p (tree expr)
926 enum tree_code code = TREE_CODE (expr);
927 enum tree_code_class class = TREE_CODE_CLASS (code);
929 if (code == SSA_NAME)
930 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
932 if (code == INTEGER_CST
933 || is_gimple_min_invariant (expr))
934 return false;
936 if (code == ADDR_EXPR)
937 return !for_each_index (&TREE_OPERAND (expr, 0),
938 idx_contains_abnormal_ssa_name_p,
939 NULL);
941 switch (class)
943 case tcc_binary:
944 case tcc_comparison:
945 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
946 return true;
948 /* Fallthru. */
949 case tcc_unary:
950 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
951 return true;
953 break;
955 default:
956 gcc_unreachable ();
959 return false;
962 /* Finds basic ivs. */
964 static bool
965 find_bivs (struct ivopts_data *data)
967 tree phi, step, type, base;
968 bool found = false;
969 struct loop *loop = data->current_loop;
971 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
973 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
974 continue;
976 step = determine_biv_step (phi);
978 if (!step)
979 continue;
980 if (cst_and_fits_in_hwi (step)
981 && int_cst_value (step) == 0)
982 continue;
984 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
985 if (contains_abnormal_ssa_name_p (base))
986 continue;
988 type = TREE_TYPE (PHI_RESULT (phi));
989 base = fold_convert (type, base);
990 step = fold_convert (type, step);
992 /* FIXME: We do not handle induction variables whose step does
993 not satisfy cst_and_fits_in_hwi. */
994 if (!cst_and_fits_in_hwi (step))
995 continue;
997 set_iv (data, PHI_RESULT (phi), base, step);
998 found = true;
1001 return found;
1004 /* Marks basic ivs. */
1006 static void
1007 mark_bivs (struct ivopts_data *data)
1009 tree phi, var;
1010 struct iv *iv, *incr_iv;
1011 struct loop *loop = data->current_loop;
1012 basic_block incr_bb;
1014 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1016 iv = get_iv (data, PHI_RESULT (phi));
1017 if (!iv)
1018 continue;
1020 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1021 incr_iv = get_iv (data, var);
1022 if (!incr_iv)
1023 continue;
1025 /* If the increment is in the subloop, ignore it. */
1026 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1027 if (incr_bb->loop_father != data->current_loop
1028 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1029 continue;
1031 iv->biv_p = true;
1032 incr_iv->biv_p = true;
1036 /* Checks whether STMT defines a linear induction variable and stores its
1037 parameters to BASE and STEP. */
1039 static bool
1040 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1041 tree *base, tree *step)
1043 tree lhs;
1044 struct loop *loop = data->current_loop;
1046 *base = NULL_TREE;
1047 *step = NULL_TREE;
1049 if (TREE_CODE (stmt) != MODIFY_EXPR)
1050 return false;
1052 lhs = TREE_OPERAND (stmt, 0);
1053 if (TREE_CODE (lhs) != SSA_NAME)
1054 return false;
1056 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1057 return false;
1059 /* FIXME: We do not handle induction variables whose step does
1060 not satisfy cst_and_fits_in_hwi. */
1061 if (!zero_p (*step)
1062 && !cst_and_fits_in_hwi (*step))
1063 return false;
1065 if (contains_abnormal_ssa_name_p (*base))
1066 return false;
1068 return true;
1071 /* Finds general ivs in statement STMT. */
1073 static void
1074 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1076 tree base, step;
1078 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1079 return;
1081 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1084 /* Finds general ivs in basic block BB. */
1086 static void
1087 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1089 block_stmt_iterator bsi;
1091 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1092 find_givs_in_stmt (data, bsi_stmt (bsi));
1095 /* Finds general ivs. */
1097 static void
1098 find_givs (struct ivopts_data *data)
1100 struct loop *loop = data->current_loop;
1101 basic_block *body = get_loop_body_in_dom_order (loop);
1102 unsigned i;
1104 for (i = 0; i < loop->num_nodes; i++)
1105 find_givs_in_bb (data, body[i]);
1106 free (body);
1109 /* For each ssa name defined in LOOP determines whether it is an induction
1110 variable and if so, its initial value and step. */
1112 static bool
1113 find_induction_variables (struct ivopts_data *data)
1115 unsigned i;
1116 bitmap_iterator bi;
1118 if (!find_bivs (data))
1119 return false;
1121 find_givs (data);
1122 mark_bivs (data);
1124 if (dump_file && (dump_flags & TDF_DETAILS))
1126 struct tree_niter_desc *niter;
1128 niter = niter_for_single_dom_exit (data);
1130 if (niter)
1132 fprintf (dump_file, " number of iterations ");
1133 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1134 fprintf (dump_file, "\n");
1136 fprintf (dump_file, " may be zero if ");
1137 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1138 fprintf (dump_file, "\n");
1139 fprintf (dump_file, "\n");
1142 fprintf (dump_file, "Induction variables:\n\n");
1144 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1146 if (ver_info (data, i)->iv)
1147 dump_iv (dump_file, ver_info (data, i)->iv);
1151 return true;
1154 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1156 static struct iv_use *
1157 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1158 tree stmt, enum use_type use_type)
1160 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1162 use->id = n_iv_uses (data);
1163 use->type = use_type;
1164 use->iv = iv;
1165 use->stmt = stmt;
1166 use->op_p = use_p;
1167 use->related_cands = BITMAP_ALLOC (NULL);
1169 /* To avoid showing ssa name in the dumps, if it was not reset by the
1170 caller. */
1171 iv->ssa_name = NULL_TREE;
1173 if (dump_file && (dump_flags & TDF_DETAILS))
1174 dump_use (dump_file, use);
1176 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1178 return use;
1181 /* Checks whether OP is a loop-level invariant and if so, records it.
1182 NONLINEAR_USE is true if the invariant is used in a way we do not
1183 handle specially. */
1185 static void
1186 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1188 basic_block bb;
1189 struct version_info *info;
1191 if (TREE_CODE (op) != SSA_NAME
1192 || !is_gimple_reg (op))
1193 return;
1195 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1196 if (bb
1197 && flow_bb_inside_loop_p (data->current_loop, bb))
1198 return;
1200 info = name_info (data, op);
1201 info->name = op;
1202 info->has_nonlin_use |= nonlinear_use;
1203 if (!info->inv_id)
1204 info->inv_id = ++data->max_inv_id;
1205 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1208 /* Checks whether the use OP is interesting and if so, records it
1209 as TYPE. */
1211 static struct iv_use *
1212 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1213 enum use_type type)
1215 struct iv *iv;
1216 struct iv *civ;
1217 tree stmt;
1218 struct iv_use *use;
1220 if (TREE_CODE (op) != SSA_NAME)
1221 return NULL;
1223 iv = get_iv (data, op);
1224 if (!iv)
1225 return NULL;
1227 if (iv->have_use_for)
1229 use = iv_use (data, iv->use_id);
1231 gcc_assert (use->type == USE_NONLINEAR_EXPR
1232 || use->type == USE_OUTER);
1234 if (type == USE_NONLINEAR_EXPR)
1235 use->type = USE_NONLINEAR_EXPR;
1236 return use;
1239 if (zero_p (iv->step))
1241 record_invariant (data, op, true);
1242 return NULL;
1244 iv->have_use_for = true;
1246 civ = xmalloc (sizeof (struct iv));
1247 *civ = *iv;
1249 stmt = SSA_NAME_DEF_STMT (op);
1250 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1251 || TREE_CODE (stmt) == MODIFY_EXPR);
1253 use = record_use (data, NULL, civ, stmt, type);
1254 iv->use_id = use->id;
1256 return use;
1259 /* Checks whether the use OP is interesting and if so, records it. */
1261 static struct iv_use *
1262 find_interesting_uses_op (struct ivopts_data *data, tree op)
1264 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1267 /* Records a definition of induction variable OP that is used outside of the
1268 loop. */
1270 static struct iv_use *
1271 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1273 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1276 /* Checks whether the condition *COND_P in STMT is interesting
1277 and if so, records it. */
1279 static void
1280 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1282 tree *op0_p;
1283 tree *op1_p;
1284 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1285 struct iv const_iv;
1286 tree zero = integer_zero_node;
1288 const_iv.step = NULL_TREE;
1290 if (TREE_CODE (*cond_p) != SSA_NAME
1291 && !COMPARISON_CLASS_P (*cond_p))
1292 return;
1294 if (TREE_CODE (*cond_p) == SSA_NAME)
1296 op0_p = cond_p;
1297 op1_p = &zero;
1299 else
1301 op0_p = &TREE_OPERAND (*cond_p, 0);
1302 op1_p = &TREE_OPERAND (*cond_p, 1);
1305 if (TREE_CODE (*op0_p) == SSA_NAME)
1306 iv0 = get_iv (data, *op0_p);
1307 else
1308 iv0 = &const_iv;
1310 if (TREE_CODE (*op1_p) == SSA_NAME)
1311 iv1 = get_iv (data, *op1_p);
1312 else
1313 iv1 = &const_iv;
1315 if (/* When comparing with non-invariant value, we may not do any senseful
1316 induction variable elimination. */
1317 (!iv0 || !iv1)
1318 /* Eliminating condition based on two ivs would be nontrivial.
1319 ??? TODO -- it is not really important to handle this case. */
1320 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1322 find_interesting_uses_op (data, *op0_p);
1323 find_interesting_uses_op (data, *op1_p);
1324 return;
1327 if (zero_p (iv0->step) && zero_p (iv1->step))
1329 /* If both are invariants, this is a work for unswitching. */
1330 return;
1333 civ = xmalloc (sizeof (struct iv));
1334 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1335 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1338 /* Returns true if expression EXPR is obviously invariant in LOOP,
1339 i.e. if all its operands are defined outside of the LOOP. */
1341 bool
1342 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1344 basic_block def_bb;
1345 unsigned i, len;
1347 if (is_gimple_min_invariant (expr))
1348 return true;
1350 if (TREE_CODE (expr) == SSA_NAME)
1352 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1353 if (def_bb
1354 && flow_bb_inside_loop_p (loop, def_bb))
1355 return false;
1357 return true;
1360 if (!EXPR_P (expr))
1361 return false;
1363 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1364 for (i = 0; i < len; i++)
1365 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1366 return false;
1368 return true;
1371 /* Cumulates the steps of indices into DATA and replaces their values with the
1372 initial ones. Returns false when the value of the index cannot be determined.
1373 Callback for for_each_index. */
1375 struct ifs_ivopts_data
1377 struct ivopts_data *ivopts_data;
1378 tree stmt;
1379 tree *step_p;
1382 static bool
1383 idx_find_step (tree base, tree *idx, void *data)
1385 struct ifs_ivopts_data *dta = data;
1386 struct iv *iv;
1387 tree step, type, iv_type, iv_step, lbound, off;
1388 struct loop *loop = dta->ivopts_data->current_loop;
1390 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1391 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1392 return false;
1394 /* If base is a component ref, require that the offset of the reference
1395 be invariant. */
1396 if (TREE_CODE (base) == COMPONENT_REF)
1398 off = component_ref_field_offset (base);
1399 return expr_invariant_in_loop_p (loop, off);
1402 /* If base is array, first check whether we will be able to move the
1403 reference out of the loop (in order to take its address in strength
1404 reduction). In order for this to work we need both lower bound
1405 and step to be loop invariants. */
1406 if (TREE_CODE (base) == ARRAY_REF)
1408 step = array_ref_element_size (base);
1409 lbound = array_ref_low_bound (base);
1411 if (!expr_invariant_in_loop_p (loop, step)
1412 || !expr_invariant_in_loop_p (loop, lbound))
1413 return false;
1416 if (TREE_CODE (*idx) != SSA_NAME)
1417 return true;
1419 iv = get_iv (dta->ivopts_data, *idx);
1420 if (!iv)
1421 return false;
1423 *idx = iv->base;
1425 if (!iv->step)
1426 return true;
1428 iv_type = TREE_TYPE (iv->base);
1429 type = build_pointer_type (TREE_TYPE (base));
1430 if (TREE_CODE (base) == ARRAY_REF)
1432 step = array_ref_element_size (base);
1434 /* We only handle addresses whose step is an integer constant. */
1435 if (TREE_CODE (step) != INTEGER_CST)
1436 return false;
1438 else
1439 /* The step for pointer arithmetics already is 1 byte. */
1440 step = build_int_cst (type, 1);
1442 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1443 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1444 type, iv->base, iv->step, dta->stmt);
1445 else
1446 iv_step = fold_convert (iv_type, iv->step);
1448 if (!iv_step)
1450 /* The index might wrap. */
1451 return false;
1454 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1456 if (!*dta->step_p)
1457 *dta->step_p = step;
1458 else
1459 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1460 *dta->step_p, step);
1462 return true;
1465 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1466 object is passed to it in DATA. */
1468 static bool
1469 idx_record_use (tree base, tree *idx,
1470 void *data)
1472 find_interesting_uses_op (data, *idx);
1473 if (TREE_CODE (base) == ARRAY_REF)
1475 find_interesting_uses_op (data, array_ref_element_size (base));
1476 find_interesting_uses_op (data, array_ref_low_bound (base));
1478 return true;
1481 /* Returns true if memory reference REF may be unaligned. */
1483 static bool
1484 may_be_unaligned_p (tree ref)
1486 tree base;
1487 tree base_type;
1488 HOST_WIDE_INT bitsize;
1489 HOST_WIDE_INT bitpos;
1490 tree toffset;
1491 enum machine_mode mode;
1492 int unsignedp, volatilep;
1493 unsigned base_align;
1495 /* The test below is basically copy of what expr.c:normal_inner_ref
1496 does to check whether the object must be loaded by parts when
1497 STRICT_ALIGNMENT is true. */
1498 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1499 &unsignedp, &volatilep, true);
1500 base_type = TREE_TYPE (base);
1501 base_align = TYPE_ALIGN (base_type);
1503 if (mode != BLKmode
1504 && (base_align < GET_MODE_ALIGNMENT (mode)
1505 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1506 || bitpos % BITS_PER_UNIT != 0))
1507 return true;
1509 return false;
1512 /* Finds addresses in *OP_P inside STMT. */
1514 static void
1515 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1517 tree base = unshare_expr (*op_p), step = NULL;
1518 struct iv *civ;
1519 struct ifs_ivopts_data ifs_ivopts_data;
1521 /* Ignore bitfields for now. Not really something terribly complicated
1522 to handle. TODO. */
1523 if (TREE_CODE (base) == COMPONENT_REF
1524 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1525 goto fail;
1527 if (STRICT_ALIGNMENT
1528 && may_be_unaligned_p (base))
1529 goto fail;
1531 ifs_ivopts_data.ivopts_data = data;
1532 ifs_ivopts_data.stmt = stmt;
1533 ifs_ivopts_data.step_p = &step;
1534 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1535 || zero_p (step))
1536 goto fail;
1538 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1539 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1541 if (TREE_CODE (base) == INDIRECT_REF)
1542 base = TREE_OPERAND (base, 0);
1543 else
1544 base = build_addr (base);
1546 civ = alloc_iv (base, step);
1547 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548 return;
1550 fail:
1551 for_each_index (op_p, idx_record_use, data);
1554 /* Finds and records invariants used in STMT. */
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1559 use_optype uses = NULL;
1560 unsigned i, n;
1561 tree op;
1563 if (TREE_CODE (stmt) == PHI_NODE)
1564 n = PHI_NUM_ARGS (stmt);
1565 else
1567 uses = STMT_USE_OPS (stmt);
1568 n = NUM_USES (uses);
1571 for (i = 0; i < n; i++)
1573 if (TREE_CODE (stmt) == PHI_NODE)
1574 op = PHI_ARG_DEF (stmt, i);
1575 else
1576 op = USE_OP (uses, i);
1578 record_invariant (data, op, false);
1582 /* Finds interesting uses of induction variables in the statement STMT. */
1584 static void
1585 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1587 struct iv *iv;
1588 tree op, lhs, rhs;
1589 use_optype uses = NULL;
1590 unsigned i, n;
1592 find_invariants_stmt (data, stmt);
1594 if (TREE_CODE (stmt) == COND_EXPR)
1596 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1597 return;
1600 if (TREE_CODE (stmt) == MODIFY_EXPR)
1602 lhs = TREE_OPERAND (stmt, 0);
1603 rhs = TREE_OPERAND (stmt, 1);
1605 if (TREE_CODE (lhs) == SSA_NAME)
1607 /* If the statement defines an induction variable, the uses are not
1608 interesting by themselves. */
1610 iv = get_iv (data, lhs);
1612 if (iv && !zero_p (iv->step))
1613 return;
1616 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1618 case tcc_comparison:
1619 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1620 return;
1622 case tcc_reference:
1623 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1624 if (REFERENCE_CLASS_P (lhs))
1625 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1626 return;
1628 default: ;
1631 if (REFERENCE_CLASS_P (lhs)
1632 && is_gimple_val (rhs))
1634 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1635 find_interesting_uses_op (data, rhs);
1636 return;
1639 /* TODO -- we should also handle address uses of type
1641 memory = call (whatever);
1645 call (memory). */
1648 if (TREE_CODE (stmt) == PHI_NODE
1649 && bb_for_stmt (stmt) == data->current_loop->header)
1651 lhs = PHI_RESULT (stmt);
1652 iv = get_iv (data, lhs);
1654 if (iv && !zero_p (iv->step))
1655 return;
1658 if (TREE_CODE (stmt) == PHI_NODE)
1659 n = PHI_NUM_ARGS (stmt);
1660 else
1662 uses = STMT_USE_OPS (stmt);
1663 n = NUM_USES (uses);
1666 for (i = 0; i < n; i++)
1668 if (TREE_CODE (stmt) == PHI_NODE)
1669 op = PHI_ARG_DEF (stmt, i);
1670 else
1671 op = USE_OP (uses, i);
1673 if (TREE_CODE (op) != SSA_NAME)
1674 continue;
1676 iv = get_iv (data, op);
1677 if (!iv)
1678 continue;
1680 find_interesting_uses_op (data, op);
1684 /* Finds interesting uses of induction variables outside of loops
1685 on loop exit edge EXIT. */
1687 static void
1688 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1690 tree phi, def;
1692 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1694 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1695 find_interesting_uses_outer (data, def);
1699 /* Finds uses of the induction variables that are interesting. */
1701 static void
1702 find_interesting_uses (struct ivopts_data *data)
1704 basic_block bb;
1705 block_stmt_iterator bsi;
1706 tree phi;
1707 basic_block *body = get_loop_body (data->current_loop);
1708 unsigned i;
1709 struct version_info *info;
1710 edge e;
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Uses:\n\n");
1715 for (i = 0; i < data->current_loop->num_nodes; i++)
1717 edge_iterator ei;
1718 bb = body[i];
1720 FOR_EACH_EDGE (e, ei, bb->succs)
1721 if (e->dest != EXIT_BLOCK_PTR
1722 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1723 find_interesting_uses_outside (data, e);
1725 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1726 find_interesting_uses_stmt (data, phi);
1727 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1728 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1733 bitmap_iterator bi;
1735 fprintf (dump_file, "\n");
1737 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1739 info = ver_info (data, i);
1740 if (info->inv_id)
1742 fprintf (dump_file, " ");
1743 print_generic_expr (dump_file, info->name, TDF_SLIM);
1744 fprintf (dump_file, " is invariant (%d)%s\n",
1745 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1749 fprintf (dump_file, "\n");
1752 free (body);
1755 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1756 is true, assume we are inside an address. */
1758 static tree
1759 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1761 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1762 enum tree_code code;
1763 tree type, orig_type = TREE_TYPE (expr);
1764 unsigned HOST_WIDE_INT off0, off1, st;
1765 tree orig_expr = expr;
1767 STRIP_NOPS (expr);
1768 type = TREE_TYPE (expr);
1769 code = TREE_CODE (expr);
1770 *offset = 0;
1772 switch (code)
1774 case INTEGER_CST:
1775 if (!cst_and_fits_in_hwi (expr)
1776 || zero_p (expr))
1777 return orig_expr;
1779 *offset = int_cst_value (expr);
1780 return build_int_cst_type (orig_type, 0);
1782 case PLUS_EXPR:
1783 case MINUS_EXPR:
1784 op0 = TREE_OPERAND (expr, 0);
1785 op1 = TREE_OPERAND (expr, 1);
1787 op0 = strip_offset (op0, false, &off0);
1788 op1 = strip_offset (op1, false, &off1);
1790 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1791 if (op0 == TREE_OPERAND (expr, 0)
1792 && op1 == TREE_OPERAND (expr, 1))
1793 return orig_expr;
1795 if (zero_p (op1))
1796 expr = op0;
1797 else if (zero_p (op0))
1799 if (code == PLUS_EXPR)
1800 expr = op1;
1801 else
1802 expr = build1 (NEGATE_EXPR, type, op1);
1804 else
1805 expr = build2 (code, type, op0, op1);
1807 return fold_convert (orig_type, expr);
1809 case ARRAY_REF:
1810 if (!inside_addr)
1811 return orig_expr;
1813 step = array_ref_element_size (expr);
1814 if (!cst_and_fits_in_hwi (step))
1815 break;
1817 st = int_cst_value (step);
1818 op1 = TREE_OPERAND (expr, 1);
1819 op1 = strip_offset (op1, false, &off1);
1820 *offset = off1 * st;
1821 break;
1823 case COMPONENT_REF:
1824 if (!inside_addr)
1825 return orig_expr;
1826 break;
1828 case ADDR_EXPR:
1829 inside_addr = true;
1830 break;
1832 default:
1833 return orig_expr;
1836 /* Default handling of expressions for that we want to recurse into
1837 the first operand. */
1838 op0 = TREE_OPERAND (expr, 0);
1839 op0 = strip_offset (op0, inside_addr, &off0);
1840 *offset += off0;
1842 if (op0 == TREE_OPERAND (expr, 0)
1843 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1844 return orig_expr;
1846 expr = copy_node (expr);
1847 TREE_OPERAND (expr, 0) = op0;
1848 if (op1)
1849 TREE_OPERAND (expr, 1) = op1;
1851 return fold_convert (orig_type, expr);
1854 /* Returns variant of TYPE that can be used as base for different uses.
1855 For integer types, we return unsigned variant of the type, which
1856 avoids problems with overflows. For pointer types, we return void *. */
1858 static tree
1859 generic_type_for (tree type)
1861 if (POINTER_TYPE_P (type))
1862 return ptr_type_node;
1864 if (TYPE_UNSIGNED (type))
1865 return type;
1867 return unsigned_type_for (type);
1870 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1871 position to POS. If USE is not NULL, the candidate is set as related to
1872 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1873 replacement of the final value of the iv by a direct computation. */
1875 static struct iv_cand *
1876 add_candidate_1 (struct ivopts_data *data,
1877 tree base, tree step, bool important, enum iv_position pos,
1878 struct iv_use *use, tree incremented_at)
1880 unsigned i;
1881 struct iv_cand *cand = NULL;
1882 tree type, orig_type;
1884 if (base)
1886 orig_type = TREE_TYPE (base);
1887 type = generic_type_for (orig_type);
1888 if (type != orig_type)
1890 base = fold_convert (type, base);
1891 if (step)
1892 step = fold_convert (type, step);
1896 for (i = 0; i < n_iv_cands (data); i++)
1898 cand = iv_cand (data, i);
1900 if (cand->pos != pos)
1901 continue;
1903 if (cand->incremented_at != incremented_at)
1904 continue;
1906 if (!cand->iv)
1908 if (!base && !step)
1909 break;
1911 continue;
1914 if (!base && !step)
1915 continue;
1917 if (!operand_equal_p (base, cand->iv->base, 0))
1918 continue;
1920 if (zero_p (cand->iv->step))
1922 if (zero_p (step))
1923 break;
1925 else
1927 if (step && operand_equal_p (step, cand->iv->step, 0))
1928 break;
1932 if (i == n_iv_cands (data))
1934 cand = xcalloc (1, sizeof (struct iv_cand));
1935 cand->id = i;
1937 if (!base && !step)
1938 cand->iv = NULL;
1939 else
1940 cand->iv = alloc_iv (base, step);
1942 cand->pos = pos;
1943 if (pos != IP_ORIGINAL && cand->iv)
1945 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1946 cand->var_after = cand->var_before;
1948 cand->important = important;
1949 cand->incremented_at = incremented_at;
1950 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1952 if (dump_file && (dump_flags & TDF_DETAILS))
1953 dump_cand (dump_file, cand);
1956 if (important && !cand->important)
1958 cand->important = true;
1959 if (dump_file && (dump_flags & TDF_DETAILS))
1960 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1963 if (use)
1965 bitmap_set_bit (use->related_cands, i);
1966 if (dump_file && (dump_flags & TDF_DETAILS))
1967 fprintf (dump_file, "Candidate %d is related to use %d\n",
1968 cand->id, use->id);
1971 return cand;
1974 /* Returns true if incrementing the induction variable at the end of the LOOP
1975 is allowed.
1977 The purpose is to avoid splitting latch edge with a biv increment, thus
1978 creating a jump, possibly confusing other optimization passes and leaving
1979 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1980 is not available (so we do not have a better alternative), or if the latch
1981 edge is already nonempty. */
1983 static bool
1984 allow_ip_end_pos_p (struct loop *loop)
1986 if (!ip_normal_pos (loop))
1987 return true;
1989 if (!empty_block_p (ip_end_pos (loop)))
1990 return true;
1992 return false;
1995 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1996 position to POS. If USE is not NULL, the candidate is set as related to
1997 it. The candidate computation is scheduled on all available positions. */
1999 static void
2000 add_candidate (struct ivopts_data *data,
2001 tree base, tree step, bool important, struct iv_use *use)
2003 if (ip_normal_pos (data->current_loop))
2004 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2005 if (ip_end_pos (data->current_loop)
2006 && allow_ip_end_pos_p (data->current_loop))
2007 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2010 /* Add a standard "0 + 1 * iteration" iv candidate for a
2011 type with SIZE bits. */
2013 static void
2014 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2015 unsigned int size)
2017 tree type = lang_hooks.types.type_for_size (size, true);
2018 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2019 true, NULL);
2022 /* Adds standard iv candidates. */
2024 static void
2025 add_standard_iv_candidates (struct ivopts_data *data)
2027 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2029 /* The same for a double-integer type if it is still fast enough. */
2030 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2031 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2035 /* Adds candidates bases on the old induction variable IV. */
2037 static void
2038 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2040 tree phi, def;
2041 struct iv_cand *cand;
2043 add_candidate (data, iv->base, iv->step, true, NULL);
2045 /* The same, but with initial value zero. */
2046 add_candidate (data,
2047 build_int_cst (TREE_TYPE (iv->base), 0),
2048 iv->step, true, NULL);
2050 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2051 if (TREE_CODE (phi) == PHI_NODE)
2053 /* Additionally record the possibility of leaving the original iv
2054 untouched. */
2055 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2056 cand = add_candidate_1 (data,
2057 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2058 SSA_NAME_DEF_STMT (def));
2059 cand->var_before = iv->ssa_name;
2060 cand->var_after = def;
2064 /* Adds candidates based on the old induction variables. */
2066 static void
2067 add_old_ivs_candidates (struct ivopts_data *data)
2069 unsigned i;
2070 struct iv *iv;
2071 bitmap_iterator bi;
2073 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2075 iv = ver_info (data, i)->iv;
2076 if (iv && iv->biv_p && !zero_p (iv->step))
2077 add_old_iv_candidates (data, iv);
2081 /* Adds candidates based on the value of the induction variable IV and USE. */
2083 static void
2084 add_iv_value_candidates (struct ivopts_data *data,
2085 struct iv *iv, struct iv_use *use)
2087 add_candidate (data, iv->base, iv->step, false, use);
2089 /* The same, but with initial value zero. */
2090 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2091 iv->step, false, use);
2094 /* Adds candidates based on the address IV and USE. */
2096 static void
2097 add_address_candidates (struct ivopts_data *data,
2098 struct iv *iv, struct iv_use *use)
2100 tree base, abase;
2101 unsigned HOST_WIDE_INT offset;
2103 /* First, the trivial choices. */
2104 add_iv_value_candidates (data, iv, use);
2106 /* Second, try removing the COMPONENT_REFs. */
2107 if (TREE_CODE (iv->base) == ADDR_EXPR)
2109 base = TREE_OPERAND (iv->base, 0);
2110 while (TREE_CODE (base) == COMPONENT_REF
2111 || (TREE_CODE (base) == ARRAY_REF
2112 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2113 base = TREE_OPERAND (base, 0);
2115 if (base != TREE_OPERAND (iv->base, 0))
2117 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2118 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2120 if (TREE_CODE (base) == INDIRECT_REF)
2121 base = TREE_OPERAND (base, 0);
2122 else
2123 base = build_addr (base);
2124 add_candidate (data, base, iv->step, false, use);
2128 /* Third, try removing the constant offset. */
2129 abase = iv->base;
2130 base = strip_offset (abase, false, &offset);
2131 if (offset)
2132 add_candidate (data, base, iv->step, false, use);
2135 /* Possibly adds pseudocandidate for replacing the final value of USE by
2136 a direct computation. */
2138 static void
2139 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2141 struct tree_niter_desc *niter;
2143 /* We must know where we exit the loop and how many times does it roll. */
2144 niter = niter_for_single_dom_exit (data);
2145 if (!niter
2146 || !zero_p (niter->may_be_zero))
2147 return;
2149 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2152 /* Adds candidates based on the uses. */
2154 static void
2155 add_derived_ivs_candidates (struct ivopts_data *data)
2157 unsigned i;
2159 for (i = 0; i < n_iv_uses (data); i++)
2161 struct iv_use *use = iv_use (data, i);
2163 if (!use)
2164 continue;
2166 switch (use->type)
2168 case USE_NONLINEAR_EXPR:
2169 case USE_COMPARE:
2170 /* Just add the ivs based on the value of the iv used here. */
2171 add_iv_value_candidates (data, use->iv, use);
2172 break;
2174 case USE_OUTER:
2175 add_iv_value_candidates (data, use->iv, use);
2177 /* Additionally, add the pseudocandidate for the possibility to
2178 replace the final value by a direct computation. */
2179 add_iv_outer_candidates (data, use);
2180 break;
2182 case USE_ADDRESS:
2183 add_address_candidates (data, use->iv, use);
2184 break;
2186 default:
2187 gcc_unreachable ();
2192 /* Record important candidates and add them to related_cands bitmaps
2193 if needed. */
2195 static void
2196 record_important_candidates (struct ivopts_data *data)
2198 unsigned i;
2199 struct iv_use *use;
2201 for (i = 0; i < n_iv_cands (data); i++)
2203 struct iv_cand *cand = iv_cand (data, i);
2205 if (cand->important)
2206 bitmap_set_bit (data->important_candidates, i);
2209 data->consider_all_candidates = (n_iv_cands (data)
2210 <= CONSIDER_ALL_CANDIDATES_BOUND);
2212 if (data->consider_all_candidates)
2214 /* We will not need "related_cands" bitmaps in this case,
2215 so release them to decrease peak memory consumption. */
2216 for (i = 0; i < n_iv_uses (data); i++)
2218 use = iv_use (data, i);
2219 BITMAP_FREE (use->related_cands);
2222 else
2224 /* Add important candidates to the related_cands bitmaps. */
2225 for (i = 0; i < n_iv_uses (data); i++)
2226 bitmap_ior_into (iv_use (data, i)->related_cands,
2227 data->important_candidates);
2231 /* Finds the candidates for the induction variables. */
2233 static void
2234 find_iv_candidates (struct ivopts_data *data)
2236 /* Add commonly used ivs. */
2237 add_standard_iv_candidates (data);
2239 /* Add old induction variables. */
2240 add_old_ivs_candidates (data);
2242 /* Add induction variables derived from uses. */
2243 add_derived_ivs_candidates (data);
2245 /* Record the important candidates. */
2246 record_important_candidates (data);
2249 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2250 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2251 we allocate a simple list to every use. */
2253 static void
2254 alloc_use_cost_map (struct ivopts_data *data)
2256 unsigned i, size, s, j;
2258 for (i = 0; i < n_iv_uses (data); i++)
2260 struct iv_use *use = iv_use (data, i);
2261 bitmap_iterator bi;
2263 if (data->consider_all_candidates)
2264 size = n_iv_cands (data);
2265 else
2267 s = 0;
2268 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2270 s++;
2273 /* Round up to the power of two, so that moduling by it is fast. */
2274 for (size = 1; size < s; size <<= 1)
2275 continue;
2278 use->n_map_members = size;
2279 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2283 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2284 on invariants DEPENDS_ON and that the value used in expressing it
2285 is VALUE.*/
2287 static void
2288 set_use_iv_cost (struct ivopts_data *data,
2289 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2290 bitmap depends_on, tree value)
2292 unsigned i, s;
2294 if (cost == INFTY)
2296 BITMAP_FREE (depends_on);
2297 return;
2300 if (data->consider_all_candidates)
2302 use->cost_map[cand->id].cand = cand;
2303 use->cost_map[cand->id].cost = cost;
2304 use->cost_map[cand->id].depends_on = depends_on;
2305 use->cost_map[cand->id].value = value;
2306 return;
2309 /* n_map_members is a power of two, so this computes modulo. */
2310 s = cand->id & (use->n_map_members - 1);
2311 for (i = s; i < use->n_map_members; i++)
2312 if (!use->cost_map[i].cand)
2313 goto found;
2314 for (i = 0; i < s; i++)
2315 if (!use->cost_map[i].cand)
2316 goto found;
2318 gcc_unreachable ();
2320 found:
2321 use->cost_map[i].cand = cand;
2322 use->cost_map[i].cost = cost;
2323 use->cost_map[i].depends_on = depends_on;
2324 use->cost_map[i].value = value;
2327 /* Gets cost of (USE, CANDIDATE) pair. */
2329 static struct cost_pair *
2330 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2331 struct iv_cand *cand)
2333 unsigned i, s;
2334 struct cost_pair *ret;
2336 if (!cand)
2337 return NULL;
2339 if (data->consider_all_candidates)
2341 ret = use->cost_map + cand->id;
2342 if (!ret->cand)
2343 return NULL;
2345 return ret;
2348 /* n_map_members is a power of two, so this computes modulo. */
2349 s = cand->id & (use->n_map_members - 1);
2350 for (i = s; i < use->n_map_members; i++)
2351 if (use->cost_map[i].cand == cand)
2352 return use->cost_map + i;
2354 for (i = 0; i < s; i++)
2355 if (use->cost_map[i].cand == cand)
2356 return use->cost_map + i;
2358 return NULL;
2361 /* Returns estimate on cost of computing SEQ. */
2363 static unsigned
2364 seq_cost (rtx seq)
2366 unsigned cost = 0;
2367 rtx set;
2369 for (; seq; seq = NEXT_INSN (seq))
2371 set = single_set (seq);
2372 if (set)
2373 cost += rtx_cost (set, SET);
2374 else
2375 cost++;
2378 return cost;
2381 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2382 static rtx
2383 produce_memory_decl_rtl (tree obj, int *regno)
2385 rtx x;
2387 gcc_assert (obj);
2388 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2390 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2391 x = gen_rtx_SYMBOL_REF (Pmode, name);
2393 else
2394 x = gen_raw_REG (Pmode, (*regno)++);
2396 return gen_rtx_MEM (DECL_MODE (obj), x);
2399 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2400 walk_tree. DATA contains the actual fake register number. */
2402 static tree
2403 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2405 tree obj = NULL_TREE;
2406 rtx x = NULL_RTX;
2407 int *regno = data;
2409 switch (TREE_CODE (*expr_p))
2411 case ADDR_EXPR:
2412 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2413 handled_component_p (*expr_p);
2414 expr_p = &TREE_OPERAND (*expr_p, 0))
2415 continue;
2416 obj = *expr_p;
2417 if (DECL_P (obj))
2418 x = produce_memory_decl_rtl (obj, regno);
2419 break;
2421 case SSA_NAME:
2422 *ws = 0;
2423 obj = SSA_NAME_VAR (*expr_p);
2424 if (!DECL_RTL_SET_P (obj))
2425 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2426 break;
2428 case VAR_DECL:
2429 case PARM_DECL:
2430 case RESULT_DECL:
2431 *ws = 0;
2432 obj = *expr_p;
2434 if (DECL_RTL_SET_P (obj))
2435 break;
2437 if (DECL_MODE (obj) == BLKmode)
2438 x = produce_memory_decl_rtl (obj, regno);
2439 else
2440 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2442 break;
2444 default:
2445 break;
2448 if (x)
2450 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2451 SET_DECL_RTL (obj, x);
2454 return NULL_TREE;
2457 /* Determines cost of the computation of EXPR. */
2459 static unsigned
2460 computation_cost (tree expr)
2462 rtx seq, rslt;
2463 tree type = TREE_TYPE (expr);
2464 unsigned cost;
2465 /* Avoid using hard regs in ways which may be unsupported. */
2466 int regno = LAST_VIRTUAL_REGISTER + 1;
2468 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2469 start_sequence ();
2470 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2471 seq = get_insns ();
2472 end_sequence ();
2474 cost = seq_cost (seq);
2475 if (GET_CODE (rslt) == MEM)
2476 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2478 return cost;
2481 /* Returns variable containing the value of candidate CAND at statement AT. */
2483 static tree
2484 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2486 if (stmt_after_increment (loop, cand, stmt))
2487 return cand->var_after;
2488 else
2489 return cand->var_before;
2492 /* Determines the expression by that USE is expressed from induction variable
2493 CAND at statement AT in LOOP. */
2495 static tree
2496 get_computation_at (struct loop *loop,
2497 struct iv_use *use, struct iv_cand *cand, tree at)
2499 tree ubase = use->iv->base;
2500 tree ustep = use->iv->step;
2501 tree cbase = cand->iv->base;
2502 tree cstep = cand->iv->step;
2503 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2504 tree uutype;
2505 tree expr, delta;
2506 tree ratio;
2507 unsigned HOST_WIDE_INT ustepi, cstepi;
2508 HOST_WIDE_INT ratioi;
2510 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2512 /* We do not have a precision to express the values of use. */
2513 return NULL_TREE;
2516 expr = var_at_stmt (loop, cand, at);
2518 if (TREE_TYPE (expr) != ctype)
2520 /* This may happen with the original ivs. */
2521 expr = fold_convert (ctype, expr);
2524 if (TYPE_UNSIGNED (utype))
2525 uutype = utype;
2526 else
2528 uutype = unsigned_type_for (utype);
2529 ubase = fold_convert (uutype, ubase);
2530 ustep = fold_convert (uutype, ustep);
2533 if (uutype != ctype)
2535 expr = fold_convert (uutype, expr);
2536 cbase = fold_convert (uutype, cbase);
2537 cstep = fold_convert (uutype, cstep);
2540 if (!cst_and_fits_in_hwi (cstep)
2541 || !cst_and_fits_in_hwi (ustep))
2542 return NULL_TREE;
2544 ustepi = int_cst_value (ustep);
2545 cstepi = int_cst_value (cstep);
2547 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2549 /* TODO maybe consider case when ustep divides cstep and the ratio is
2550 a power of 2 (so that the division is fast to execute)? We would
2551 need to be much more careful with overflows etc. then. */
2552 return NULL_TREE;
2555 /* We may need to shift the value if we are after the increment. */
2556 if (stmt_after_increment (loop, cand, at))
2557 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2559 /* use = ubase - ratio * cbase + ratio * var.
2561 In general case ubase + ratio * (var - cbase) could be better (one less
2562 multiplication), but often it is possible to eliminate redundant parts
2563 of computations from (ubase - ratio * cbase) term, and if it does not
2564 happen, fold is able to apply the distributive law to obtain this form
2565 anyway. */
2567 if (ratioi == 1)
2569 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2570 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2572 else if (ratioi == -1)
2574 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2575 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2577 else
2579 ratio = build_int_cst_type (uutype, ratioi);
2580 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2581 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2582 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2583 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2586 return fold_convert (utype, expr);
2589 /* Determines the expression by that USE is expressed from induction variable
2590 CAND in LOOP. */
2592 static tree
2593 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2595 return get_computation_at (loop, use, cand, use->stmt);
2598 /* Returns cost of addition in MODE. */
2600 static unsigned
2601 add_cost (enum machine_mode mode)
2603 static unsigned costs[NUM_MACHINE_MODES];
2604 rtx seq;
2605 unsigned cost;
2607 if (costs[mode])
2608 return costs[mode];
2610 start_sequence ();
2611 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2612 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2613 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2614 NULL_RTX);
2615 seq = get_insns ();
2616 end_sequence ();
2618 cost = seq_cost (seq);
2619 if (!cost)
2620 cost = 1;
2622 costs[mode] = cost;
2624 if (dump_file && (dump_flags & TDF_DETAILS))
2625 fprintf (dump_file, "Addition in %s costs %d\n",
2626 GET_MODE_NAME (mode), cost);
2627 return cost;
2630 /* Entry in a hashtable of already known costs for multiplication. */
2631 struct mbc_entry
2633 HOST_WIDE_INT cst; /* The constant to multiply by. */
2634 enum machine_mode mode; /* In mode. */
2635 unsigned cost; /* The cost. */
2638 /* Counts hash value for the ENTRY. */
2640 static hashval_t
2641 mbc_entry_hash (const void *entry)
2643 const struct mbc_entry *e = entry;
2645 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2648 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2650 static int
2651 mbc_entry_eq (const void *entry1, const void *entry2)
2653 const struct mbc_entry *e1 = entry1;
2654 const struct mbc_entry *e2 = entry2;
2656 return (e1->mode == e2->mode
2657 && e1->cst == e2->cst);
2660 /* Returns cost of multiplication by constant CST in MODE. */
2662 static unsigned
2663 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2665 static htab_t costs;
2666 struct mbc_entry **cached, act;
2667 rtx seq;
2668 unsigned cost;
2670 if (!costs)
2671 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2673 act.mode = mode;
2674 act.cst = cst;
2675 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2676 if (*cached)
2677 return (*cached)->cost;
2679 *cached = xmalloc (sizeof (struct mbc_entry));
2680 (*cached)->mode = mode;
2681 (*cached)->cst = cst;
2683 start_sequence ();
2684 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2685 NULL_RTX, 0);
2686 seq = get_insns ();
2687 end_sequence ();
2689 cost = seq_cost (seq);
2691 if (dump_file && (dump_flags & TDF_DETAILS))
2692 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2693 (int) cst, GET_MODE_NAME (mode), cost);
2695 (*cached)->cost = cost;
2697 return cost;
2700 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2701 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2702 variable is omitted. The created memory accesses MODE.
2704 TODO -- there must be some better way. This all is quite crude. */
2706 static unsigned
2707 get_address_cost (bool symbol_present, bool var_present,
2708 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2710 #define MAX_RATIO 128
2711 static sbitmap valid_mult;
2712 static HOST_WIDE_INT rat, off;
2713 static HOST_WIDE_INT min_offset, max_offset;
2714 static unsigned costs[2][2][2][2];
2715 unsigned cost, acost;
2716 rtx seq, addr, base;
2717 bool offset_p, ratio_p;
2718 rtx reg1;
2719 HOST_WIDE_INT s_offset;
2720 unsigned HOST_WIDE_INT mask;
2721 unsigned bits;
2723 if (!valid_mult)
2725 HOST_WIDE_INT i;
2727 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2729 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2730 for (i = 1; i <= 1 << 20; i <<= 1)
2732 XEXP (addr, 1) = GEN_INT (i);
2733 if (!memory_address_p (Pmode, addr))
2734 break;
2736 max_offset = i >> 1;
2737 off = max_offset;
2739 for (i = 1; i <= 1 << 20; i <<= 1)
2741 XEXP (addr, 1) = GEN_INT (-i);
2742 if (!memory_address_p (Pmode, addr))
2743 break;
2745 min_offset = -(i >> 1);
2747 if (dump_file && (dump_flags & TDF_DETAILS))
2749 fprintf (dump_file, "get_address_cost:\n");
2750 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2751 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2754 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2755 sbitmap_zero (valid_mult);
2756 rat = 1;
2757 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2758 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2760 XEXP (addr, 1) = GEN_INT (i);
2761 if (memory_address_p (Pmode, addr))
2763 SET_BIT (valid_mult, i + MAX_RATIO);
2764 rat = i;
2768 if (dump_file && (dump_flags & TDF_DETAILS))
2770 fprintf (dump_file, " allowed multipliers:");
2771 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2772 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2773 fprintf (dump_file, " %d", (int) i);
2774 fprintf (dump_file, "\n");
2775 fprintf (dump_file, "\n");
2779 bits = GET_MODE_BITSIZE (Pmode);
2780 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2781 offset &= mask;
2782 if ((offset >> (bits - 1) & 1))
2783 offset |= ~mask;
2784 s_offset = offset;
2786 cost = 0;
2787 offset_p = (s_offset != 0
2788 && min_offset <= s_offset && s_offset <= max_offset);
2789 ratio_p = (ratio != 1
2790 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2791 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2793 if (ratio != 1 && !ratio_p)
2794 cost += multiply_by_cost (ratio, Pmode);
2796 if (s_offset && !offset_p && !symbol_present)
2798 cost += add_cost (Pmode);
2799 var_present = true;
2802 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2803 if (!acost)
2805 acost = 0;
2807 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2808 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2809 if (ratio_p)
2810 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2812 if (var_present)
2813 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2815 if (symbol_present)
2817 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2818 if (offset_p)
2819 base = gen_rtx_fmt_e (CONST, Pmode,
2820 gen_rtx_fmt_ee (PLUS, Pmode,
2821 base,
2822 GEN_INT (off)));
2824 else if (offset_p)
2825 base = GEN_INT (off);
2826 else
2827 base = NULL_RTX;
2829 if (base)
2830 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2832 start_sequence ();
2833 addr = memory_address (Pmode, addr);
2834 seq = get_insns ();
2835 end_sequence ();
2837 acost = seq_cost (seq);
2838 acost += address_cost (addr, Pmode);
2840 if (!acost)
2841 acost = 1;
2842 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2845 return cost + acost;
2848 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2849 the bitmap to that we should store it. */
2851 static struct ivopts_data *fd_ivopts_data;
2852 static tree
2853 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2855 bitmap *depends_on = data;
2856 struct version_info *info;
2858 if (TREE_CODE (*expr_p) != SSA_NAME)
2859 return NULL_TREE;
2860 info = name_info (fd_ivopts_data, *expr_p);
2862 if (!info->inv_id || info->has_nonlin_use)
2863 return NULL_TREE;
2865 if (!*depends_on)
2866 *depends_on = BITMAP_ALLOC (NULL);
2867 bitmap_set_bit (*depends_on, info->inv_id);
2869 return NULL_TREE;
2872 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2873 invariants the computation depends on. */
2875 static unsigned
2876 force_var_cost (struct ivopts_data *data,
2877 tree expr, bitmap *depends_on)
2879 static bool costs_initialized = false;
2880 static unsigned integer_cost;
2881 static unsigned symbol_cost;
2882 static unsigned address_cost;
2883 tree op0, op1;
2884 unsigned cost0, cost1, cost;
2885 enum machine_mode mode;
2887 if (!costs_initialized)
2889 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2890 rtx x = gen_rtx_MEM (DECL_MODE (var),
2891 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2892 tree addr;
2893 tree type = build_pointer_type (integer_type_node);
2895 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2896 2000));
2898 SET_DECL_RTL (var, x);
2899 TREE_STATIC (var) = 1;
2900 addr = build1 (ADDR_EXPR, type, var);
2901 symbol_cost = computation_cost (addr) + 1;
2903 address_cost
2904 = computation_cost (build2 (PLUS_EXPR, type,
2905 addr,
2906 build_int_cst_type (type, 2000))) + 1;
2907 if (dump_file && (dump_flags & TDF_DETAILS))
2909 fprintf (dump_file, "force_var_cost:\n");
2910 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2911 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2912 fprintf (dump_file, " address %d\n", (int) address_cost);
2913 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2914 fprintf (dump_file, "\n");
2917 costs_initialized = true;
2920 STRIP_NOPS (expr);
2922 if (depends_on)
2924 fd_ivopts_data = data;
2925 walk_tree (&expr, find_depends, depends_on, NULL);
2928 if (SSA_VAR_P (expr))
2929 return 0;
2931 if (TREE_INVARIANT (expr))
2933 if (TREE_CODE (expr) == INTEGER_CST)
2934 return integer_cost;
2936 if (TREE_CODE (expr) == ADDR_EXPR)
2938 tree obj = TREE_OPERAND (expr, 0);
2940 if (TREE_CODE (obj) == VAR_DECL
2941 || TREE_CODE (obj) == PARM_DECL
2942 || TREE_CODE (obj) == RESULT_DECL)
2943 return symbol_cost;
2946 return address_cost;
2949 switch (TREE_CODE (expr))
2951 case PLUS_EXPR:
2952 case MINUS_EXPR:
2953 case MULT_EXPR:
2954 op0 = TREE_OPERAND (expr, 0);
2955 op1 = TREE_OPERAND (expr, 1);
2956 STRIP_NOPS (op0);
2957 STRIP_NOPS (op1);
2959 if (is_gimple_val (op0))
2960 cost0 = 0;
2961 else
2962 cost0 = force_var_cost (data, op0, NULL);
2964 if (is_gimple_val (op1))
2965 cost1 = 0;
2966 else
2967 cost1 = force_var_cost (data, op1, NULL);
2969 break;
2971 default:
2972 /* Just an arbitrary value, FIXME. */
2973 return target_spill_cost;
2976 mode = TYPE_MODE (TREE_TYPE (expr));
2977 switch (TREE_CODE (expr))
2979 case PLUS_EXPR:
2980 case MINUS_EXPR:
2981 cost = add_cost (mode);
2982 break;
2984 case MULT_EXPR:
2985 if (cst_and_fits_in_hwi (op0))
2986 cost = multiply_by_cost (int_cst_value (op0), mode);
2987 else if (cst_and_fits_in_hwi (op1))
2988 cost = multiply_by_cost (int_cst_value (op1), mode);
2989 else
2990 return target_spill_cost;
2991 break;
2993 default:
2994 gcc_unreachable ();
2997 cost += cost0;
2998 cost += cost1;
3000 /* Bound the cost by target_spill_cost. The parts of complicated
3001 computations often are either loop invariant or at least can
3002 be shared between several iv uses, so letting this grow without
3003 limits would not give reasonable results. */
3004 return cost < target_spill_cost ? cost : target_spill_cost;
3007 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3008 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3009 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3010 invariants the computation depends on. */
3012 static unsigned
3013 split_address_cost (struct ivopts_data *data,
3014 tree addr, bool *symbol_present, bool *var_present,
3015 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3017 tree core;
3018 HOST_WIDE_INT bitsize;
3019 HOST_WIDE_INT bitpos;
3020 tree toffset;
3021 enum machine_mode mode;
3022 int unsignedp, volatilep;
3024 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3025 &unsignedp, &volatilep, false);
3027 if (toffset != 0
3028 || bitpos % BITS_PER_UNIT != 0
3029 || TREE_CODE (core) != VAR_DECL)
3031 *symbol_present = false;
3032 *var_present = true;
3033 fd_ivopts_data = data;
3034 walk_tree (&addr, find_depends, depends_on, NULL);
3035 return target_spill_cost;
3038 *offset += bitpos / BITS_PER_UNIT;
3039 if (TREE_STATIC (core)
3040 || DECL_EXTERNAL (core))
3042 *symbol_present = true;
3043 *var_present = false;
3044 return 0;
3047 *symbol_present = false;
3048 *var_present = true;
3049 return 0;
3052 /* Estimates cost of expressing difference of addresses E1 - E2 as
3053 var + symbol + offset. The value of offset is added to OFFSET,
3054 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3055 part is missing. DEPENDS_ON is a set of the invariants the computation
3056 depends on. */
3058 static unsigned
3059 ptr_difference_cost (struct ivopts_data *data,
3060 tree e1, tree e2, bool *symbol_present, bool *var_present,
3061 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3063 HOST_WIDE_INT diff = 0;
3064 unsigned cost;
3066 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3068 if (ptr_difference_const (e1, e2, &diff))
3070 *offset += diff;
3071 *symbol_present = false;
3072 *var_present = false;
3073 return 0;
3076 if (e2 == integer_zero_node)
3077 return split_address_cost (data, TREE_OPERAND (e1, 0),
3078 symbol_present, var_present, offset, depends_on);
3080 *symbol_present = false;
3081 *var_present = true;
3083 cost = force_var_cost (data, e1, depends_on);
3084 cost += force_var_cost (data, e2, depends_on);
3085 cost += add_cost (Pmode);
3087 return cost;
3090 /* Estimates cost of expressing difference E1 - E2 as
3091 var + symbol + offset. The value of offset is added to OFFSET,
3092 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3093 part is missing. DEPENDS_ON is a set of the invariants the computation
3094 depends on. */
3096 static unsigned
3097 difference_cost (struct ivopts_data *data,
3098 tree e1, tree e2, bool *symbol_present, bool *var_present,
3099 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3101 unsigned cost;
3102 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3103 unsigned HOST_WIDE_INT off1, off2;
3105 e1 = strip_offset (e1, false, &off1);
3106 e2 = strip_offset (e2, false, &off2);
3107 *offset += off1 - off2;
3109 STRIP_NOPS (e1);
3110 STRIP_NOPS (e2);
3112 if (TREE_CODE (e1) == ADDR_EXPR)
3113 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3114 depends_on);
3115 *symbol_present = false;
3117 if (operand_equal_p (e1, e2, 0))
3119 *var_present = false;
3120 return 0;
3122 *var_present = true;
3123 if (zero_p (e2))
3124 return force_var_cost (data, e1, depends_on);
3126 if (zero_p (e1))
3128 cost = force_var_cost (data, e2, depends_on);
3129 cost += multiply_by_cost (-1, mode);
3131 return cost;
3134 cost = force_var_cost (data, e1, depends_on);
3135 cost += force_var_cost (data, e2, depends_on);
3136 cost += add_cost (mode);
3138 return cost;
3141 /* Determines the cost of the computation by that USE is expressed
3142 from induction variable CAND. If ADDRESS_P is true, we just need
3143 to create an address from it, otherwise we want to get it into
3144 register. A set of invariants we depend on is stored in
3145 DEPENDS_ON. AT is the statement at that the value is computed. */
3147 static unsigned
3148 get_computation_cost_at (struct ivopts_data *data,
3149 struct iv_use *use, struct iv_cand *cand,
3150 bool address_p, bitmap *depends_on, tree at)
3152 tree ubase = use->iv->base, ustep = use->iv->step;
3153 tree cbase, cstep;
3154 tree utype = TREE_TYPE (ubase), ctype;
3155 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3156 HOST_WIDE_INT ratio, aratio;
3157 bool var_present, symbol_present;
3158 unsigned cost = 0, n_sums;
3160 *depends_on = NULL;
3162 /* Only consider real candidates. */
3163 if (!cand->iv)
3164 return INFTY;
3166 cbase = cand->iv->base;
3167 cstep = cand->iv->step;
3168 ctype = TREE_TYPE (cbase);
3170 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3172 /* We do not have a precision to express the values of use. */
3173 return INFTY;
3176 if (address_p)
3178 /* Do not try to express address of an object with computation based
3179 on address of a different object. This may cause problems in rtl
3180 level alias analysis (that does not expect this to be happening,
3181 as this is illegal in C), and would be unlikely to be useful
3182 anyway. */
3183 if (use->iv->base_object
3184 && cand->iv->base_object
3185 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3186 return INFTY;
3189 if (!cst_and_fits_in_hwi (ustep)
3190 || !cst_and_fits_in_hwi (cstep))
3191 return INFTY;
3193 if (TREE_CODE (ubase) == INTEGER_CST
3194 && !cst_and_fits_in_hwi (ubase))
3195 goto fallback;
3197 if (TREE_CODE (cbase) == INTEGER_CST
3198 && !cst_and_fits_in_hwi (cbase))
3199 goto fallback;
3201 ustepi = int_cst_value (ustep);
3202 cstepi = int_cst_value (cstep);
3204 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3206 /* TODO -- add direct handling of this case. */
3207 goto fallback;
3210 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3211 return INFTY;
3213 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3214 or ratio == 1, it is better to handle this like
3216 ubase - ratio * cbase + ratio * var
3218 (also holds in the case ratio == -1, TODO. */
3220 if (TREE_CODE (cbase) == INTEGER_CST)
3222 offset = - ratio * int_cst_value (cbase);
3223 cost += difference_cost (data,
3224 ubase, integer_zero_node,
3225 &symbol_present, &var_present, &offset,
3226 depends_on);
3228 else if (ratio == 1)
3230 cost += difference_cost (data,
3231 ubase, cbase,
3232 &symbol_present, &var_present, &offset,
3233 depends_on);
3235 else
3237 cost += force_var_cost (data, cbase, depends_on);
3238 cost += add_cost (TYPE_MODE (ctype));
3239 cost += difference_cost (data,
3240 ubase, integer_zero_node,
3241 &symbol_present, &var_present, &offset,
3242 depends_on);
3245 /* If we are after the increment, the value of the candidate is higher by
3246 one iteration. */
3247 if (stmt_after_increment (data->current_loop, cand, at))
3248 offset -= ratio * cstepi;
3250 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3251 (symbol/var/const parts may be omitted). If we are looking for an address,
3252 find the cost of addressing this. */
3253 if (address_p)
3254 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3256 /* Otherwise estimate the costs for computing the expression. */
3257 aratio = ratio > 0 ? ratio : -ratio;
3258 if (!symbol_present && !var_present && !offset)
3260 if (ratio != 1)
3261 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3263 return cost;
3266 if (aratio != 1)
3267 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3269 n_sums = 1;
3270 if (var_present
3271 /* Symbol + offset should be compile-time computable. */
3272 && (symbol_present || offset))
3273 n_sums++;
3275 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3277 fallback:
3279 /* Just get the expression, expand it and measure the cost. */
3280 tree comp = get_computation_at (data->current_loop, use, cand, at);
3282 if (!comp)
3283 return INFTY;
3285 if (address_p)
3286 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3288 return computation_cost (comp);
3292 /* Determines the cost of the computation by that USE is expressed
3293 from induction variable CAND. If ADDRESS_P is true, we just need
3294 to create an address from it, otherwise we want to get it into
3295 register. A set of invariants we depend on is stored in
3296 DEPENDS_ON. */
3298 static unsigned
3299 get_computation_cost (struct ivopts_data *data,
3300 struct iv_use *use, struct iv_cand *cand,
3301 bool address_p, bitmap *depends_on)
3303 return get_computation_cost_at (data,
3304 use, cand, address_p, depends_on, use->stmt);
3307 /* Determines cost of basing replacement of USE on CAND in a generic
3308 expression. */
3310 static bool
3311 determine_use_iv_cost_generic (struct ivopts_data *data,
3312 struct iv_use *use, struct iv_cand *cand)
3314 bitmap depends_on;
3315 unsigned cost;
3317 /* The simple case first -- if we need to express value of the preserved
3318 original biv, the cost is 0. This also prevents us from counting the
3319 cost of increment twice -- once at this use and once in the cost of
3320 the candidate. */
3321 if (cand->pos == IP_ORIGINAL
3322 && cand->incremented_at == use->stmt)
3324 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3325 return true;
3328 cost = get_computation_cost (data, use, cand, false, &depends_on);
3329 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3331 return cost != INFTY;
3334 /* Determines cost of basing replacement of USE on CAND in an address. */
3336 static bool
3337 determine_use_iv_cost_address (struct ivopts_data *data,
3338 struct iv_use *use, struct iv_cand *cand)
3340 bitmap depends_on;
3341 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3343 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3345 return cost != INFTY;
3348 /* Computes value of induction variable IV in iteration NITER. */
3350 static tree
3351 iv_value (struct iv *iv, tree niter)
3353 tree val;
3354 tree type = TREE_TYPE (iv->base);
3356 niter = fold_convert (type, niter);
3357 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3359 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3362 /* Computes value of candidate CAND at position AT in iteration NITER. */
3364 static tree
3365 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3367 tree val = iv_value (cand->iv, niter);
3368 tree type = TREE_TYPE (cand->iv->base);
3370 if (stmt_after_increment (loop, cand, at))
3371 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3373 return val;
3376 /* Returns period of induction variable iv. */
3378 static tree
3379 iv_period (struct iv *iv)
3381 tree step = iv->step, period, type;
3382 tree pow2div;
3384 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3386 /* Period of the iv is gcd (step, type range). Since type range is power
3387 of two, it suffices to determine the maximum power of two that divides
3388 step. */
3389 pow2div = num_ending_zeros (step);
3390 type = unsigned_type_for (TREE_TYPE (step));
3392 period = build_low_bits_mask (type,
3393 (TYPE_PRECISION (type)
3394 - tree_low_cst (pow2div, 1)));
3396 return period;
3399 /* Returns the comparison operator used when eliminating the iv USE. */
3401 static enum tree_code
3402 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3404 struct loop *loop = data->current_loop;
3405 basic_block ex_bb;
3406 edge exit;
3408 ex_bb = bb_for_stmt (use->stmt);
3409 exit = EDGE_SUCC (ex_bb, 0);
3410 if (flow_bb_inside_loop_p (loop, exit->dest))
3411 exit = EDGE_SUCC (ex_bb, 1);
3413 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3416 /* Check whether it is possible to express the condition in USE by comparison
3417 of candidate CAND. If so, store the value compared with to BOUND. */
3419 static bool
3420 may_eliminate_iv (struct ivopts_data *data,
3421 struct iv_use *use, struct iv_cand *cand, tree *bound)
3423 basic_block ex_bb;
3424 edge exit;
3425 struct tree_niter_desc *niter;
3426 tree nit, nit_type;
3427 tree wider_type, period, per_type;
3428 struct loop *loop = data->current_loop;
3430 /* For now works only for exits that dominate the loop latch. TODO -- extend
3431 for other conditions inside loop body. */
3432 ex_bb = bb_for_stmt (use->stmt);
3433 if (use->stmt != last_stmt (ex_bb)
3434 || TREE_CODE (use->stmt) != COND_EXPR)
3435 return false;
3436 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3437 return false;
3439 exit = EDGE_SUCC (ex_bb, 0);
3440 if (flow_bb_inside_loop_p (loop, exit->dest))
3441 exit = EDGE_SUCC (ex_bb, 1);
3442 if (flow_bb_inside_loop_p (loop, exit->dest))
3443 return false;
3445 niter = niter_for_exit (data, exit);
3446 if (!niter
3447 || !zero_p (niter->may_be_zero))
3448 return false;
3450 nit = niter->niter;
3451 nit_type = TREE_TYPE (nit);
3453 /* Determine whether we may use the variable to test whether niter iterations
3454 elapsed. This is the case iff the period of the induction variable is
3455 greater than the number of iterations. */
3456 period = iv_period (cand->iv);
3457 if (!period)
3458 return false;
3459 per_type = TREE_TYPE (period);
3461 wider_type = TREE_TYPE (period);
3462 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3463 wider_type = per_type;
3464 else
3465 wider_type = nit_type;
3467 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3468 fold_convert (wider_type, period),
3469 fold_convert (wider_type, nit)))))
3470 return false;
3472 *bound = cand_value_at (loop, cand, use->stmt, nit);
3473 return true;
3476 /* Determines cost of basing replacement of USE on CAND in a condition. */
3478 static bool
3479 determine_use_iv_cost_condition (struct ivopts_data *data,
3480 struct iv_use *use, struct iv_cand *cand)
3482 tree bound = NULL_TREE, op, cond;
3483 bitmap depends_on = NULL;
3484 unsigned cost;
3486 /* Only consider real candidates. */
3487 if (!cand->iv)
3489 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3490 return false;
3493 if (may_eliminate_iv (data, use, cand, &bound))
3495 cost = force_var_cost (data, bound, &depends_on);
3497 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3498 return cost != INFTY;
3501 /* The induction variable elimination failed; just express the original
3502 giv. If it is compared with an invariant, note that we cannot get
3503 rid of it. */
3504 cost = get_computation_cost (data, use, cand, false, &depends_on);
3506 cond = *use->op_p;
3507 if (TREE_CODE (cond) != SSA_NAME)
3509 op = TREE_OPERAND (cond, 0);
3510 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3511 op = TREE_OPERAND (cond, 1);
3512 if (TREE_CODE (op) == SSA_NAME)
3514 op = get_iv (data, op)->base;
3515 fd_ivopts_data = data;
3516 walk_tree (&op, find_depends, &depends_on, NULL);
3520 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3521 return cost != INFTY;
3524 /* Checks whether it is possible to replace the final value of USE by
3525 a direct computation. If so, the formula is stored to *VALUE. */
3527 static bool
3528 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3529 tree *value)
3531 struct loop *loop = data->current_loop;
3532 edge exit;
3533 struct tree_niter_desc *niter;
3535 exit = single_dom_exit (loop);
3536 if (!exit)
3537 return false;
3539 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3540 bb_for_stmt (use->stmt)));
3542 niter = niter_for_single_dom_exit (data);
3543 if (!niter
3544 || !zero_p (niter->may_be_zero))
3545 return false;
3547 *value = iv_value (use->iv, niter->niter);
3549 return true;
3552 /* Determines cost of replacing final value of USE using CAND. */
3554 static bool
3555 determine_use_iv_cost_outer (struct ivopts_data *data,
3556 struct iv_use *use, struct iv_cand *cand)
3558 bitmap depends_on;
3559 unsigned cost;
3560 edge exit;
3561 tree value = NULL_TREE;
3562 struct loop *loop = data->current_loop;
3564 /* The simple case first -- if we need to express value of the preserved
3565 original biv, the cost is 0. This also prevents us from counting the
3566 cost of increment twice -- once at this use and once in the cost of
3567 the candidate. */
3568 if (cand->pos == IP_ORIGINAL
3569 && cand->incremented_at == use->stmt)
3571 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3572 return true;
3575 if (!cand->iv)
3577 if (!may_replace_final_value (data, use, &value))
3579 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3580 return false;
3583 depends_on = NULL;
3584 cost = force_var_cost (data, value, &depends_on);
3586 cost /= AVG_LOOP_NITER (loop);
3588 set_use_iv_cost (data, use, cand, cost, depends_on, value);
3589 return cost != INFTY;
3592 exit = single_dom_exit (loop);
3593 if (exit)
3595 /* If there is just a single exit, we may use value of the candidate
3596 after we take it to determine the value of use. */
3597 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3598 last_stmt (exit->src));
3599 if (cost != INFTY)
3600 cost /= AVG_LOOP_NITER (loop);
3602 else
3604 /* Otherwise we just need to compute the iv. */
3605 cost = get_computation_cost (data, use, cand, false, &depends_on);
3608 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3610 return cost != INFTY;
3613 /* Determines cost of basing replacement of USE on CAND. Returns false
3614 if USE cannot be based on CAND. */
3616 static bool
3617 determine_use_iv_cost (struct ivopts_data *data,
3618 struct iv_use *use, struct iv_cand *cand)
3620 switch (use->type)
3622 case USE_NONLINEAR_EXPR:
3623 return determine_use_iv_cost_generic (data, use, cand);
3625 case USE_OUTER:
3626 return determine_use_iv_cost_outer (data, use, cand);
3628 case USE_ADDRESS:
3629 return determine_use_iv_cost_address (data, use, cand);
3631 case USE_COMPARE:
3632 return determine_use_iv_cost_condition (data, use, cand);
3634 default:
3635 gcc_unreachable ();
3639 /* Determines costs of basing the use of the iv on an iv candidate. */
3641 static void
3642 determine_use_iv_costs (struct ivopts_data *data)
3644 unsigned i, j;
3645 struct iv_use *use;
3646 struct iv_cand *cand;
3647 bitmap to_clear = BITMAP_ALLOC (NULL);
3649 alloc_use_cost_map (data);
3651 for (i = 0; i < n_iv_uses (data); i++)
3653 use = iv_use (data, i);
3655 if (data->consider_all_candidates)
3657 for (j = 0; j < n_iv_cands (data); j++)
3659 cand = iv_cand (data, j);
3660 determine_use_iv_cost (data, use, cand);
3663 else
3665 bitmap_iterator bi;
3667 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3669 cand = iv_cand (data, j);
3670 if (!determine_use_iv_cost (data, use, cand))
3671 bitmap_set_bit (to_clear, j);
3674 /* Remove the candidates for that the cost is infinite from
3675 the list of related candidates. */
3676 bitmap_and_compl_into (use->related_cands, to_clear);
3677 bitmap_clear (to_clear);
3681 BITMAP_FREE (to_clear);
3683 if (dump_file && (dump_flags & TDF_DETAILS))
3685 fprintf (dump_file, "Use-candidate costs:\n");
3687 for (i = 0; i < n_iv_uses (data); i++)
3689 use = iv_use (data, i);
3691 fprintf (dump_file, "Use %d:\n", i);
3692 fprintf (dump_file, " cand\tcost\tdepends on\n");
3693 for (j = 0; j < use->n_map_members; j++)
3695 if (!use->cost_map[j].cand
3696 || use->cost_map[j].cost == INFTY)
3697 continue;
3699 fprintf (dump_file, " %d\t%d\t",
3700 use->cost_map[j].cand->id,
3701 use->cost_map[j].cost);
3702 if (use->cost_map[j].depends_on)
3703 bitmap_print (dump_file,
3704 use->cost_map[j].depends_on, "","");
3705 fprintf (dump_file, "\n");
3708 fprintf (dump_file, "\n");
3710 fprintf (dump_file, "\n");
3714 /* Determines cost of the candidate CAND. */
3716 static void
3717 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3719 unsigned cost_base, cost_step;
3720 tree base;
3722 if (!cand->iv)
3724 cand->cost = 0;
3725 return;
3728 /* There are two costs associated with the candidate -- its increment
3729 and its initialization. The second is almost negligible for any loop
3730 that rolls enough, so we take it just very little into account. */
3732 base = cand->iv->base;
3733 cost_base = force_var_cost (data, base, NULL);
3734 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3736 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3738 /* Prefer the original iv unless we may gain something by replacing it;
3739 this is not really relevant for artificial ivs created by other
3740 passes. */
3741 if (cand->pos == IP_ORIGINAL
3742 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3743 cand->cost--;
3745 /* Prefer not to insert statements into latch unless there are some
3746 already (so that we do not create unnecessary jumps). */
3747 if (cand->pos == IP_END
3748 && empty_block_p (ip_end_pos (data->current_loop)))
3749 cand->cost++;
3752 /* Determines costs of computation of the candidates. */
3754 static void
3755 determine_iv_costs (struct ivopts_data *data)
3757 unsigned i;
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3761 fprintf (dump_file, "Candidate costs:\n");
3762 fprintf (dump_file, " cand\tcost\n");
3765 for (i = 0; i < n_iv_cands (data); i++)
3767 struct iv_cand *cand = iv_cand (data, i);
3769 determine_iv_cost (data, cand);
3771 if (dump_file && (dump_flags & TDF_DETAILS))
3772 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3776 fprintf (dump_file, "\n");
3779 /* Calculates cost for having SIZE induction variables. */
3781 static unsigned
3782 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3784 return global_cost_for_size (size,
3785 loop_data (data->current_loop)->regs_used,
3786 n_iv_uses (data));
3789 /* For each size of the induction variable set determine the penalty. */
3791 static void
3792 determine_set_costs (struct ivopts_data *data)
3794 unsigned j, n;
3795 tree phi, op;
3796 struct loop *loop = data->current_loop;
3797 bitmap_iterator bi;
3799 /* We use the following model (definitely improvable, especially the
3800 cost function -- TODO):
3802 We estimate the number of registers available (using MD data), name it A.
3804 We estimate the number of registers used by the loop, name it U. This
3805 number is obtained as the number of loop phi nodes (not counting virtual
3806 registers and bivs) + the number of variables from outside of the loop.
3808 We set a reserve R (free regs that are used for temporary computations,
3809 etc.). For now the reserve is a constant 3.
3811 Let I be the number of induction variables.
3813 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3814 make a lot of ivs without a reason).
3815 -- if A - R < U + I <= A, the cost is I * PRES_COST
3816 -- if U + I > A, the cost is I * PRES_COST and
3817 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, "Global costs:\n");
3822 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3823 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3824 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3825 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3828 n = 0;
3829 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3831 op = PHI_RESULT (phi);
3833 if (!is_gimple_reg (op))
3834 continue;
3836 if (get_iv (data, op))
3837 continue;
3839 n++;
3842 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3844 struct version_info *info = ver_info (data, j);
3846 if (info->inv_id && info->has_nonlin_use)
3847 n++;
3850 loop_data (loop)->regs_used = n;
3851 if (dump_file && (dump_flags & TDF_DETAILS))
3852 fprintf (dump_file, " regs_used %d\n", n);
3854 if (dump_file && (dump_flags & TDF_DETAILS))
3856 fprintf (dump_file, " cost for size:\n");
3857 fprintf (dump_file, " ivs\tcost\n");
3858 for (j = 0; j <= 2 * target_avail_regs; j++)
3859 fprintf (dump_file, " %d\t%d\n", j,
3860 ivopts_global_cost_for_size (data, j));
3861 fprintf (dump_file, "\n");
3865 /* Returns true if A is a cheaper cost pair than B. */
3867 static bool
3868 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3870 if (!a)
3871 return false;
3873 if (!b)
3874 return true;
3876 if (a->cost < b->cost)
3877 return true;
3879 if (a->cost > b->cost)
3880 return false;
3882 /* In case the costs are the same, prefer the cheaper candidate. */
3883 if (a->cand->cost < b->cand->cost)
3884 return true;
3886 return false;
3889 /* Computes the cost field of IVS structure. */
3891 static void
3892 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3894 unsigned cost = 0;
3896 cost += ivs->cand_use_cost;
3897 cost += ivs->cand_cost;
3898 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3900 ivs->cost = cost;
3903 /* Set USE not to be expressed by any candidate in IVS. */
3905 static void
3906 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3907 struct iv_use *use)
3909 unsigned uid = use->id, cid, iid;
3910 bitmap deps;
3911 struct cost_pair *cp;
3912 bitmap_iterator bi;
3914 cp = ivs->cand_for_use[uid];
3915 if (!cp)
3916 return;
3917 cid = cp->cand->id;
3919 ivs->bad_uses++;
3920 ivs->cand_for_use[uid] = NULL;
3921 ivs->n_cand_uses[cid]--;
3923 if (ivs->n_cand_uses[cid] == 0)
3925 bitmap_clear_bit (ivs->cands, cid);
3926 /* Do not count the pseudocandidates. */
3927 if (cp->cand->iv)
3928 ivs->n_regs--;
3929 ivs->n_cands--;
3930 ivs->cand_cost -= cp->cand->cost;
3933 ivs->cand_use_cost -= cp->cost;
3935 deps = cp->depends_on;
3937 if (deps)
3939 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3941 ivs->n_invariant_uses[iid]--;
3942 if (ivs->n_invariant_uses[iid] == 0)
3943 ivs->n_regs--;
3947 iv_ca_recount_cost (data, ivs);
3950 /* Set cost pair for USE in set IVS to CP. */
3952 static void
3953 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3954 struct iv_use *use, struct cost_pair *cp)
3956 unsigned uid = use->id, cid, iid;
3957 bitmap deps;
3958 bitmap_iterator bi;
3960 if (ivs->cand_for_use[uid] == cp)
3961 return;
3963 if (ivs->cand_for_use[uid])
3964 iv_ca_set_no_cp (data, ivs, use);
3966 if (cp)
3968 cid = cp->cand->id;
3970 ivs->bad_uses--;
3971 ivs->cand_for_use[uid] = cp;
3972 ivs->n_cand_uses[cid]++;
3973 if (ivs->n_cand_uses[cid] == 1)
3975 bitmap_set_bit (ivs->cands, cid);
3976 /* Do not count the pseudocandidates. */
3977 if (cp->cand->iv)
3978 ivs->n_regs++;
3979 ivs->n_cands++;
3980 ivs->cand_cost += cp->cand->cost;
3983 ivs->cand_use_cost += cp->cost;
3985 deps = cp->depends_on;
3987 if (deps)
3989 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3991 ivs->n_invariant_uses[iid]++;
3992 if (ivs->n_invariant_uses[iid] == 1)
3993 ivs->n_regs++;
3997 iv_ca_recount_cost (data, ivs);
4001 /* Extend set IVS by expressing USE by some of the candidates in it
4002 if possible. */
4004 static void
4005 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4006 struct iv_use *use)
4008 struct cost_pair *best_cp = NULL, *cp;
4009 bitmap_iterator bi;
4010 unsigned i;
4012 gcc_assert (ivs->upto >= use->id);
4014 if (ivs->upto == use->id)
4016 ivs->upto++;
4017 ivs->bad_uses++;
4020 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4022 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4024 if (cheaper_cost_pair (cp, best_cp))
4025 best_cp = cp;
4028 iv_ca_set_cp (data, ivs, use, best_cp);
4031 /* Get cost for assignment IVS. */
4033 static unsigned
4034 iv_ca_cost (struct iv_ca *ivs)
4036 return (ivs->bad_uses ? INFTY : ivs->cost);
4039 /* Returns true if all dependences of CP are among invariants in IVS. */
4041 static bool
4042 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4044 unsigned i;
4045 bitmap_iterator bi;
4047 if (!cp->depends_on)
4048 return true;
4050 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4052 if (ivs->n_invariant_uses[i] == 0)
4053 return false;
4056 return true;
4059 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4060 it before NEXT_CHANGE. */
4062 static struct iv_ca_delta *
4063 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4064 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4066 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4068 change->use = use;
4069 change->old_cp = old_cp;
4070 change->new_cp = new_cp;
4071 change->next_change = next_change;
4073 return change;
4076 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4077 are rewritten. */
4079 static struct iv_ca_delta *
4080 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4082 struct iv_ca_delta *last;
4084 if (!l2)
4085 return l1;
4087 if (!l1)
4088 return l2;
4090 for (last = l1; last->next_change; last = last->next_change)
4091 continue;
4092 last->next_change = l2;
4094 return l1;
4097 /* Returns candidate by that USE is expressed in IVS. */
4099 static struct cost_pair *
4100 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4102 return ivs->cand_for_use[use->id];
4105 /* Reverse the list of changes DELTA, forming the inverse to it. */
4107 static struct iv_ca_delta *
4108 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4110 struct iv_ca_delta *act, *next, *prev = NULL;
4111 struct cost_pair *tmp;
4113 for (act = delta; act; act = next)
4115 next = act->next_change;
4116 act->next_change = prev;
4117 prev = act;
4119 tmp = act->old_cp;
4120 act->old_cp = act->new_cp;
4121 act->new_cp = tmp;
4124 return prev;
4127 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4128 reverted instead. */
4130 static void
4131 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4132 struct iv_ca_delta *delta, bool forward)
4134 struct cost_pair *from, *to;
4135 struct iv_ca_delta *act;
4137 if (!forward)
4138 delta = iv_ca_delta_reverse (delta);
4140 for (act = delta; act; act = act->next_change)
4142 from = act->old_cp;
4143 to = act->new_cp;
4144 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4145 iv_ca_set_cp (data, ivs, act->use, to);
4148 if (!forward)
4149 iv_ca_delta_reverse (delta);
4152 /* Returns true if CAND is used in IVS. */
4154 static bool
4155 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4157 return ivs->n_cand_uses[cand->id] > 0;
4160 /* Returns number of induction variable candidates in the set IVS. */
4162 static unsigned
4163 iv_ca_n_cands (struct iv_ca *ivs)
4165 return ivs->n_cands;
4168 /* Free the list of changes DELTA. */
4170 static void
4171 iv_ca_delta_free (struct iv_ca_delta **delta)
4173 struct iv_ca_delta *act, *next;
4175 for (act = *delta; act; act = next)
4177 next = act->next_change;
4178 free (act);
4181 *delta = NULL;
4184 /* Allocates new iv candidates assignment. */
4186 static struct iv_ca *
4187 iv_ca_new (struct ivopts_data *data)
4189 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4191 nw->upto = 0;
4192 nw->bad_uses = 0;
4193 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4194 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4195 nw->cands = BITMAP_ALLOC (NULL);
4196 nw->n_cands = 0;
4197 nw->n_regs = 0;
4198 nw->cand_use_cost = 0;
4199 nw->cand_cost = 0;
4200 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4201 nw->cost = 0;
4203 return nw;
4206 /* Free memory occupied by the set IVS. */
4208 static void
4209 iv_ca_free (struct iv_ca **ivs)
4211 free ((*ivs)->cand_for_use);
4212 free ((*ivs)->n_cand_uses);
4213 BITMAP_FREE ((*ivs)->cands);
4214 free ((*ivs)->n_invariant_uses);
4215 free (*ivs);
4216 *ivs = NULL;
4219 /* Dumps IVS to FILE. */
4221 static void
4222 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4224 const char *pref = " invariants ";
4225 unsigned i;
4227 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4228 bitmap_print (file, ivs->cands, " candidates ","\n");
4230 for (i = 1; i <= data->max_inv_id; i++)
4231 if (ivs->n_invariant_uses[i])
4233 fprintf (file, "%s%d", pref, i);
4234 pref = ", ";
4236 fprintf (file, "\n");
4239 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4240 new set, and store differences in DELTA. Number of induction variables
4241 in the new set is stored to N_IVS. */
4243 static unsigned
4244 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4245 struct iv_cand *cand, struct iv_ca_delta **delta,
4246 unsigned *n_ivs)
4248 unsigned i, cost;
4249 struct iv_use *use;
4250 struct cost_pair *old_cp, *new_cp;
4252 *delta = NULL;
4253 for (i = 0; i < ivs->upto; i++)
4255 use = iv_use (data, i);
4256 old_cp = iv_ca_cand_for_use (ivs, use);
4258 if (old_cp
4259 && old_cp->cand == cand)
4260 continue;
4262 new_cp = get_use_iv_cost (data, use, cand);
4263 if (!new_cp)
4264 continue;
4266 if (!iv_ca_has_deps (ivs, new_cp))
4267 continue;
4269 if (!cheaper_cost_pair (new_cp, old_cp))
4270 continue;
4272 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4275 iv_ca_delta_commit (data, ivs, *delta, true);
4276 cost = iv_ca_cost (ivs);
4277 if (n_ivs)
4278 *n_ivs = iv_ca_n_cands (ivs);
4279 iv_ca_delta_commit (data, ivs, *delta, false);
4281 return cost;
4284 /* Try narrowing set IVS by removing CAND. Return the cost of
4285 the new set and store the differences in DELTA. */
4287 static unsigned
4288 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4289 struct iv_cand *cand, struct iv_ca_delta **delta)
4291 unsigned i, ci;
4292 struct iv_use *use;
4293 struct cost_pair *old_cp, *new_cp, *cp;
4294 bitmap_iterator bi;
4295 struct iv_cand *cnd;
4296 unsigned cost;
4298 *delta = NULL;
4299 for (i = 0; i < n_iv_uses (data); i++)
4301 use = iv_use (data, i);
4303 old_cp = iv_ca_cand_for_use (ivs, use);
4304 if (old_cp->cand != cand)
4305 continue;
4307 new_cp = NULL;
4309 if (data->consider_all_candidates)
4311 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4313 if (ci == cand->id)
4314 continue;
4316 cnd = iv_cand (data, ci);
4318 cp = get_use_iv_cost (data, use, cnd);
4319 if (!cp)
4320 continue;
4321 if (!iv_ca_has_deps (ivs, cp))
4322 continue;
4324 if (!cheaper_cost_pair (cp, new_cp))
4325 continue;
4327 new_cp = cp;
4330 else
4332 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4334 if (ci == cand->id)
4335 continue;
4337 cnd = iv_cand (data, ci);
4339 cp = get_use_iv_cost (data, use, cnd);
4340 if (!cp)
4341 continue;
4342 if (!iv_ca_has_deps (ivs, cp))
4343 continue;
4345 if (!cheaper_cost_pair (cp, new_cp))
4346 continue;
4348 new_cp = cp;
4352 if (!new_cp)
4354 iv_ca_delta_free (delta);
4355 return INFTY;
4358 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4361 iv_ca_delta_commit (data, ivs, *delta, true);
4362 cost = iv_ca_cost (ivs);
4363 iv_ca_delta_commit (data, ivs, *delta, false);
4365 return cost;
4368 /* Try optimizing the set of candidates IVS by removing candidates different
4369 from to EXCEPT_CAND from it. Return cost of the new set, and store
4370 differences in DELTA. */
4372 static unsigned
4373 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4374 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4376 bitmap_iterator bi;
4377 struct iv_ca_delta *act_delta, *best_delta;
4378 unsigned i, best_cost, acost;
4379 struct iv_cand *cand;
4381 best_delta = NULL;
4382 best_cost = iv_ca_cost (ivs);
4384 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4386 cand = iv_cand (data, i);
4388 if (cand == except_cand)
4389 continue;
4391 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4393 if (acost < best_cost)
4395 best_cost = acost;
4396 iv_ca_delta_free (&best_delta);
4397 best_delta = act_delta;
4399 else
4400 iv_ca_delta_free (&act_delta);
4403 if (!best_delta)
4405 *delta = NULL;
4406 return best_cost;
4409 /* Recurse to possibly remove other unnecessary ivs. */
4410 iv_ca_delta_commit (data, ivs, best_delta, true);
4411 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4412 iv_ca_delta_commit (data, ivs, best_delta, false);
4413 *delta = iv_ca_delta_join (best_delta, *delta);
4414 return best_cost;
4417 /* Tries to extend the sets IVS in the best possible way in order
4418 to express the USE. */
4420 static bool
4421 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4422 struct iv_use *use)
4424 unsigned best_cost, act_cost;
4425 unsigned i;
4426 bitmap_iterator bi;
4427 struct iv_cand *cand;
4428 struct iv_ca_delta *best_delta = NULL, *act_delta;
4429 struct cost_pair *cp;
4431 iv_ca_add_use (data, ivs, use);
4432 best_cost = iv_ca_cost (ivs);
4434 cp = iv_ca_cand_for_use (ivs, use);
4435 if (cp)
4437 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4438 iv_ca_set_no_cp (data, ivs, use);
4441 /* First try important candidates. Only if it fails, try the specific ones.
4442 Rationale -- in loops with many variables the best choice often is to use
4443 just one generic biv. If we added here many ivs specific to the uses,
4444 the optimization algorithm later would be likely to get stuck in a local
4445 minimum, thus causing us to create too many ivs. The approach from
4446 few ivs to more seems more likely to be successful -- starting from few
4447 ivs, replacing an expensive use by a specific iv should always be a
4448 win. */
4449 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4451 cand = iv_cand (data, i);
4453 if (iv_ca_cand_used_p (ivs, cand))
4454 continue;
4456 cp = get_use_iv_cost (data, use, cand);
4457 if (!cp)
4458 continue;
4460 iv_ca_set_cp (data, ivs, use, cp);
4461 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4462 iv_ca_set_no_cp (data, ivs, use);
4463 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4465 if (act_cost < best_cost)
4467 best_cost = act_cost;
4469 iv_ca_delta_free (&best_delta);
4470 best_delta = act_delta;
4472 else
4473 iv_ca_delta_free (&act_delta);
4476 if (best_cost == INFTY)
4478 for (i = 0; i < use->n_map_members; i++)
4480 cp = use->cost_map + i;
4481 cand = cp->cand;
4482 if (!cand)
4483 continue;
4485 /* Already tried this. */
4486 if (cand->important)
4487 continue;
4489 if (iv_ca_cand_used_p (ivs, cand))
4490 continue;
4492 act_delta = NULL;
4493 iv_ca_set_cp (data, ivs, use, cp);
4494 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4495 iv_ca_set_no_cp (data, ivs, use);
4496 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4497 cp, act_delta);
4499 if (act_cost < best_cost)
4501 best_cost = act_cost;
4503 if (best_delta)
4504 iv_ca_delta_free (&best_delta);
4505 best_delta = act_delta;
4507 else
4508 iv_ca_delta_free (&act_delta);
4512 iv_ca_delta_commit (data, ivs, best_delta, true);
4513 iv_ca_delta_free (&best_delta);
4515 return (best_cost != INFTY);
4518 /* Finds an initial assignment of candidates to uses. */
4520 static struct iv_ca *
4521 get_initial_solution (struct ivopts_data *data)
4523 struct iv_ca *ivs = iv_ca_new (data);
4524 unsigned i;
4526 for (i = 0; i < n_iv_uses (data); i++)
4527 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4529 iv_ca_free (&ivs);
4530 return NULL;
4533 return ivs;
4536 /* Tries to improve set of induction variables IVS. */
4538 static bool
4539 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4541 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4542 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4543 struct iv_cand *cand;
4545 /* Try extending the set of induction variables by one. */
4546 for (i = 0; i < n_iv_cands (data); i++)
4548 cand = iv_cand (data, i);
4550 if (iv_ca_cand_used_p (ivs, cand))
4551 continue;
4553 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4554 if (!act_delta)
4555 continue;
4557 /* If we successfully added the candidate and the set is small enough,
4558 try optimizing it by removing other candidates. */
4559 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4561 iv_ca_delta_commit (data, ivs, act_delta, true);
4562 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4563 iv_ca_delta_commit (data, ivs, act_delta, false);
4564 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4567 if (acost < best_cost)
4569 best_cost = acost;
4570 iv_ca_delta_free (&best_delta);
4571 best_delta = act_delta;
4573 else
4574 iv_ca_delta_free (&act_delta);
4577 if (!best_delta)
4579 /* Try removing the candidates from the set instead. */
4580 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4582 /* Nothing more we can do. */
4583 if (!best_delta)
4584 return false;
4587 iv_ca_delta_commit (data, ivs, best_delta, true);
4588 gcc_assert (best_cost == iv_ca_cost (ivs));
4589 iv_ca_delta_free (&best_delta);
4590 return true;
4593 /* Attempts to find the optimal set of induction variables. We do simple
4594 greedy heuristic -- we try to replace at most one candidate in the selected
4595 solution and remove the unused ivs while this improves the cost. */
4597 static struct iv_ca *
4598 find_optimal_iv_set (struct ivopts_data *data)
4600 unsigned i;
4601 struct iv_ca *set;
4602 struct iv_use *use;
4604 /* Get the initial solution. */
4605 set = get_initial_solution (data);
4606 if (!set)
4608 if (dump_file && (dump_flags & TDF_DETAILS))
4609 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4610 return NULL;
4613 if (dump_file && (dump_flags & TDF_DETAILS))
4615 fprintf (dump_file, "Initial set of candidates:\n");
4616 iv_ca_dump (data, dump_file, set);
4619 while (try_improve_iv_set (data, set))
4621 if (dump_file && (dump_flags & TDF_DETAILS))
4623 fprintf (dump_file, "Improved to:\n");
4624 iv_ca_dump (data, dump_file, set);
4628 if (dump_file && (dump_flags & TDF_DETAILS))
4629 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4631 for (i = 0; i < n_iv_uses (data); i++)
4633 use = iv_use (data, i);
4634 use->selected = iv_ca_cand_for_use (set, use)->cand;
4637 return set;
4640 /* Creates a new induction variable corresponding to CAND. */
4642 static void
4643 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4645 block_stmt_iterator incr_pos;
4646 tree base;
4647 bool after = false;
4649 if (!cand->iv)
4650 return;
4652 switch (cand->pos)
4654 case IP_NORMAL:
4655 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4656 break;
4658 case IP_END:
4659 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4660 after = true;
4661 break;
4663 case IP_ORIGINAL:
4664 /* Mark that the iv is preserved. */
4665 name_info (data, cand->var_before)->preserve_biv = true;
4666 name_info (data, cand->var_after)->preserve_biv = true;
4668 /* Rewrite the increment so that it uses var_before directly. */
4669 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4671 return;
4674 gimple_add_tmp_var (cand->var_before);
4675 add_referenced_tmp_var (cand->var_before);
4677 base = unshare_expr (cand->iv->base);
4679 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4680 &incr_pos, after, &cand->var_before, &cand->var_after);
4683 /* Creates new induction variables described in SET. */
4685 static void
4686 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4688 unsigned i;
4689 struct iv_cand *cand;
4690 bitmap_iterator bi;
4692 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4694 cand = iv_cand (data, i);
4695 create_new_iv (data, cand);
4699 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4700 is true, remove also the ssa name defined by the statement. */
4702 static void
4703 remove_statement (tree stmt, bool including_defined_name)
4705 if (TREE_CODE (stmt) == PHI_NODE)
4707 if (!including_defined_name)
4709 /* Prevent the ssa name defined by the statement from being removed. */
4710 SET_PHI_RESULT (stmt, NULL);
4712 remove_phi_node (stmt, NULL_TREE);
4714 else
4716 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4718 bsi_remove (&bsi);
4722 /* Rewrites USE (definition of iv used in a nonlinear expression)
4723 using candidate CAND. */
4725 static void
4726 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4727 struct iv_use *use, struct iv_cand *cand)
4729 tree comp;
4730 tree op, stmts, tgt, ass;
4731 block_stmt_iterator bsi, pbsi;
4733 /* An important special case -- if we are asked to express value of
4734 the original iv by itself, just exit; there is no need to
4735 introduce a new computation (that might also need casting the
4736 variable to unsigned and back). */
4737 if (cand->pos == IP_ORIGINAL
4738 && TREE_CODE (use->stmt) == MODIFY_EXPR
4739 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4741 op = TREE_OPERAND (use->stmt, 1);
4743 /* Be a bit careful. In case variable is expressed in some
4744 complicated way, rewrite it so that we may get rid of this
4745 complicated expression. */
4746 if ((TREE_CODE (op) == PLUS_EXPR
4747 || TREE_CODE (op) == MINUS_EXPR)
4748 && TREE_OPERAND (op, 0) == cand->var_before
4749 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4750 return;
4753 comp = unshare_expr (get_computation (data->current_loop,
4754 use, cand));
4755 switch (TREE_CODE (use->stmt))
4757 case PHI_NODE:
4758 tgt = PHI_RESULT (use->stmt);
4760 /* If we should keep the biv, do not replace it. */
4761 if (name_info (data, tgt)->preserve_biv)
4762 return;
4764 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4765 while (!bsi_end_p (pbsi)
4766 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4768 bsi = pbsi;
4769 bsi_next (&pbsi);
4771 break;
4773 case MODIFY_EXPR:
4774 tgt = TREE_OPERAND (use->stmt, 0);
4775 bsi = bsi_for_stmt (use->stmt);
4776 break;
4778 default:
4779 gcc_unreachable ();
4782 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4784 if (TREE_CODE (use->stmt) == PHI_NODE)
4786 if (stmts)
4787 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4788 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4789 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4790 remove_statement (use->stmt, false);
4791 SSA_NAME_DEF_STMT (tgt) = ass;
4793 else
4795 if (stmts)
4796 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4797 TREE_OPERAND (use->stmt, 1) = op;
4801 /* Replaces ssa name in index IDX by its basic variable. Callback for
4802 for_each_index. */
4804 static bool
4805 idx_remove_ssa_names (tree base, tree *idx,
4806 void *data ATTRIBUTE_UNUSED)
4808 tree *op;
4810 if (TREE_CODE (*idx) == SSA_NAME)
4811 *idx = SSA_NAME_VAR (*idx);
4813 if (TREE_CODE (base) == ARRAY_REF)
4815 op = &TREE_OPERAND (base, 2);
4816 if (*op
4817 && TREE_CODE (*op) == SSA_NAME)
4818 *op = SSA_NAME_VAR (*op);
4819 op = &TREE_OPERAND (base, 3);
4820 if (*op
4821 && TREE_CODE (*op) == SSA_NAME)
4822 *op = SSA_NAME_VAR (*op);
4825 return true;
4828 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4830 static tree
4831 unshare_and_remove_ssa_names (tree ref)
4833 ref = unshare_expr (ref);
4834 for_each_index (&ref, idx_remove_ssa_names, NULL);
4836 return ref;
4839 /* Rewrites base of memory access OP with expression WITH in statement
4840 pointed to by BSI. */
4842 static void
4843 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4845 tree bvar, var, new_name, copy, name;
4846 tree orig;
4848 var = bvar = get_base_address (*op);
4850 if (!var || TREE_CODE (with) != SSA_NAME)
4851 goto do_rewrite;
4853 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4854 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4855 if (TREE_CODE (var) == INDIRECT_REF)
4856 var = TREE_OPERAND (var, 0);
4857 if (TREE_CODE (var) == SSA_NAME)
4859 name = var;
4860 var = SSA_NAME_VAR (var);
4862 else if (DECL_P (var))
4863 name = NULL_TREE;
4864 else
4865 goto do_rewrite;
4867 /* We need to add a memory tag for the variable. But we do not want
4868 to add it to the temporary used for the computations, since this leads
4869 to problems in redundancy elimination when there are common parts
4870 in two computations referring to the different arrays. So we copy
4871 the variable to a new temporary. */
4872 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4874 if (name)
4875 new_name = duplicate_ssa_name (name, copy);
4876 else
4878 tree tag = var_ann (var)->type_mem_tag;
4879 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4880 add_referenced_tmp_var (new_ptr);
4881 if (tag)
4882 var_ann (new_ptr)->type_mem_tag = tag;
4883 else
4884 add_type_alias (new_ptr, var);
4885 new_name = make_ssa_name (new_ptr, copy);
4888 TREE_OPERAND (copy, 0) = new_name;
4889 update_stmt (copy);
4890 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4891 with = new_name;
4893 do_rewrite:
4895 orig = NULL_TREE;
4896 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4897 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4899 if (TREE_CODE (*op) == INDIRECT_REF)
4900 orig = REF_ORIGINAL (*op);
4901 if (!orig)
4902 orig = unshare_and_remove_ssa_names (*op);
4904 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4906 /* Record the original reference, for purposes of alias analysis. */
4907 REF_ORIGINAL (*op) = orig;
4909 /* Virtual operands in the original statement may have to be renamed
4910 because of the replacement. */
4911 mark_new_vars_to_rename (bsi_stmt (*bsi));
4914 /* Rewrites USE (address that is an iv) using candidate CAND. */
4916 static void
4917 rewrite_use_address (struct ivopts_data *data,
4918 struct iv_use *use, struct iv_cand *cand)
4920 tree comp = unshare_expr (get_computation (data->current_loop,
4921 use, cand));
4922 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4923 tree stmts;
4924 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4926 if (stmts)
4927 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4929 rewrite_address_base (&bsi, use->op_p, op);
4932 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4933 candidate CAND. */
4935 static void
4936 rewrite_use_compare (struct ivopts_data *data,
4937 struct iv_use *use, struct iv_cand *cand)
4939 tree comp;
4940 tree *op_p, cond, op, stmts, bound;
4941 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4942 enum tree_code compare;
4943 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
4945 bound = cp->value;
4946 if (bound)
4948 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4949 tree var_type = TREE_TYPE (var);
4951 compare = iv_elimination_compare (data, use);
4952 bound = fold_convert (var_type, bound);
4953 op = force_gimple_operand (unshare_expr (bound), &stmts,
4954 true, NULL_TREE);
4956 if (stmts)
4957 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4959 *use->op_p = build2 (compare, boolean_type_node, var, op);
4960 update_stmt (use->stmt);
4961 return;
4964 /* The induction variable elimination failed; just express the original
4965 giv. */
4966 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4968 cond = *use->op_p;
4969 op_p = &TREE_OPERAND (cond, 0);
4970 if (TREE_CODE (*op_p) != SSA_NAME
4971 || zero_p (get_iv (data, *op_p)->step))
4972 op_p = &TREE_OPERAND (cond, 1);
4974 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4975 if (stmts)
4976 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4978 *op_p = op;
4981 /* Ensure that operand *OP_P may be used at the end of EXIT without
4982 violating loop closed ssa form. */
4984 static void
4985 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4987 basic_block def_bb;
4988 struct loop *def_loop;
4989 tree phi, use;
4991 use = USE_FROM_PTR (op_p);
4992 if (TREE_CODE (use) != SSA_NAME)
4993 return;
4995 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4996 if (!def_bb)
4997 return;
4999 def_loop = def_bb->loop_father;
5000 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5001 return;
5003 /* Try finding a phi node that copies the value out of the loop. */
5004 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5005 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5006 break;
5008 if (!phi)
5010 /* Create such a phi node. */
5011 tree new_name = duplicate_ssa_name (use, NULL);
5013 phi = create_phi_node (new_name, exit->dest);
5014 SSA_NAME_DEF_STMT (new_name) = phi;
5015 add_phi_arg (phi, use, exit);
5018 SET_USE (op_p, PHI_RESULT (phi));
5021 /* Ensure that operands of STMT may be used at the end of EXIT without
5022 violating loop closed ssa form. */
5024 static void
5025 protect_loop_closed_ssa_form (edge exit, tree stmt)
5027 use_optype uses;
5028 vuse_optype vuses;
5029 v_may_def_optype v_may_defs;
5030 unsigned i;
5032 uses = STMT_USE_OPS (stmt);
5033 for (i = 0; i < NUM_USES (uses); i++)
5034 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5036 vuses = STMT_VUSE_OPS (stmt);
5037 for (i = 0; i < NUM_VUSES (vuses); i++)
5038 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5040 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5041 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5042 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5045 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5046 so that they are emitted on the correct place, and so that the loop closed
5047 ssa form is preserved. */
5049 static void
5050 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5052 tree_stmt_iterator tsi;
5053 block_stmt_iterator bsi;
5054 tree phi, stmt, def, next;
5056 if (!single_pred_p (exit->dest))
5057 split_loop_exit_edge (exit);
5059 /* Ensure there is label in exit->dest, so that we can
5060 insert after it. */
5061 tree_block_label (exit->dest);
5062 bsi = bsi_after_labels (exit->dest);
5064 if (TREE_CODE (stmts) == STATEMENT_LIST)
5066 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5068 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5069 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5072 else
5074 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5075 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5078 if (!op)
5079 return;
5081 for (phi = phi_nodes (exit->dest); phi; phi = next)
5083 next = PHI_CHAIN (phi);
5085 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5087 def = PHI_RESULT (phi);
5088 remove_statement (phi, false);
5089 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5090 def, op);
5091 SSA_NAME_DEF_STMT (def) = stmt;
5092 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5097 /* Rewrites the final value of USE (that is only needed outside of the loop)
5098 using candidate CAND. */
5100 static void
5101 rewrite_use_outer (struct ivopts_data *data,
5102 struct iv_use *use, struct iv_cand *cand)
5104 edge exit;
5105 tree value, op, stmts, tgt;
5106 tree phi;
5108 switch (TREE_CODE (use->stmt))
5110 case PHI_NODE:
5111 tgt = PHI_RESULT (use->stmt);
5112 break;
5113 case MODIFY_EXPR:
5114 tgt = TREE_OPERAND (use->stmt, 0);
5115 break;
5116 default:
5117 gcc_unreachable ();
5120 exit = single_dom_exit (data->current_loop);
5122 if (exit)
5124 if (!cand->iv)
5126 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5127 value = cp->value;
5129 else
5130 value = get_computation_at (data->current_loop,
5131 use, cand, last_stmt (exit->src));
5133 value = unshare_expr (value);
5134 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5136 /* If we will preserve the iv anyway and we would need to perform
5137 some computation to replace the final value, do nothing. */
5138 if (stmts && name_info (data, tgt)->preserve_biv)
5139 return;
5141 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5143 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5145 if (USE_FROM_PTR (use_p) == tgt)
5146 SET_USE (use_p, op);
5149 if (stmts)
5150 compute_phi_arg_on_exit (exit, stmts, op);
5152 /* Enable removal of the statement. We cannot remove it directly,
5153 since we may still need the aliasing information attached to the
5154 ssa name defined by it. */
5155 name_info (data, tgt)->iv->have_use_for = false;
5156 return;
5159 /* If the variable is going to be preserved anyway, there is nothing to
5160 do. */
5161 if (name_info (data, tgt)->preserve_biv)
5162 return;
5164 /* Otherwise we just need to compute the iv. */
5165 rewrite_use_nonlinear_expr (data, use, cand);
5168 /* Rewrites USE using candidate CAND. */
5170 static void
5171 rewrite_use (struct ivopts_data *data,
5172 struct iv_use *use, struct iv_cand *cand)
5174 switch (use->type)
5176 case USE_NONLINEAR_EXPR:
5177 rewrite_use_nonlinear_expr (data, use, cand);
5178 break;
5180 case USE_OUTER:
5181 rewrite_use_outer (data, use, cand);
5182 break;
5184 case USE_ADDRESS:
5185 rewrite_use_address (data, use, cand);
5186 break;
5188 case USE_COMPARE:
5189 rewrite_use_compare (data, use, cand);
5190 break;
5192 default:
5193 gcc_unreachable ();
5195 update_stmt (use->stmt);
5198 /* Rewrite the uses using the selected induction variables. */
5200 static void
5201 rewrite_uses (struct ivopts_data *data)
5203 unsigned i;
5204 struct iv_cand *cand;
5205 struct iv_use *use;
5207 for (i = 0; i < n_iv_uses (data); i++)
5209 use = iv_use (data, i);
5210 cand = use->selected;
5211 gcc_assert (cand);
5213 rewrite_use (data, use, cand);
5217 /* Removes the ivs that are not used after rewriting. */
5219 static void
5220 remove_unused_ivs (struct ivopts_data *data)
5222 unsigned j;
5223 bitmap_iterator bi;
5225 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5227 struct version_info *info;
5229 info = ver_info (data, j);
5230 if (info->iv
5231 && !zero_p (info->iv->step)
5232 && !info->inv_id
5233 && !info->iv->have_use_for
5234 && !info->preserve_biv)
5235 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5239 /* Frees data allocated by the optimization of a single loop. */
5241 static void
5242 free_loop_data (struct ivopts_data *data)
5244 unsigned i, j;
5245 bitmap_iterator bi;
5246 tree obj;
5248 htab_empty (data->niters);
5250 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5252 struct version_info *info;
5254 info = ver_info (data, i);
5255 if (info->iv)
5256 free (info->iv);
5257 info->iv = NULL;
5258 info->has_nonlin_use = false;
5259 info->preserve_biv = false;
5260 info->inv_id = 0;
5262 bitmap_clear (data->relevant);
5263 bitmap_clear (data->important_candidates);
5265 for (i = 0; i < n_iv_uses (data); i++)
5267 struct iv_use *use = iv_use (data, i);
5269 free (use->iv);
5270 BITMAP_FREE (use->related_cands);
5271 for (j = 0; j < use->n_map_members; j++)
5272 if (use->cost_map[j].depends_on)
5273 BITMAP_FREE (use->cost_map[j].depends_on);
5274 free (use->cost_map);
5275 free (use);
5277 VEC_truncate (iv_use_p, data->iv_uses, 0);
5279 for (i = 0; i < n_iv_cands (data); i++)
5281 struct iv_cand *cand = iv_cand (data, i);
5283 if (cand->iv)
5284 free (cand->iv);
5285 free (cand);
5287 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5289 if (data->version_info_size < num_ssa_names)
5291 data->version_info_size = 2 * num_ssa_names;
5292 free (data->version_info);
5293 data->version_info = xcalloc (data->version_info_size,
5294 sizeof (struct version_info));
5297 data->max_inv_id = 0;
5299 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5300 SET_DECL_RTL (obj, NULL_RTX);
5302 VEC_truncate (tree, decl_rtl_to_reset, 0);
5305 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5306 loop tree. */
5308 static void
5309 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5311 unsigned i;
5313 for (i = 1; i < loops->num; i++)
5314 if (loops->parray[i])
5316 free (loops->parray[i]->aux);
5317 loops->parray[i]->aux = NULL;
5320 free_loop_data (data);
5321 free (data->version_info);
5322 BITMAP_FREE (data->relevant);
5323 BITMAP_FREE (data->important_candidates);
5324 htab_delete (data->niters);
5326 VEC_free (tree, heap, decl_rtl_to_reset);
5327 VEC_free (iv_use_p, heap, data->iv_uses);
5328 VEC_free (iv_cand_p, heap, data->iv_candidates);
5331 /* Optimizes the LOOP. Returns true if anything changed. */
5333 static bool
5334 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5336 bool changed = false;
5337 struct iv_ca *iv_ca;
5338 edge exit;
5340 data->current_loop = loop;
5342 if (dump_file && (dump_flags & TDF_DETAILS))
5344 fprintf (dump_file, "Processing loop %d\n", loop->num);
5346 exit = single_dom_exit (loop);
5347 if (exit)
5349 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5350 exit->src->index, exit->dest->index);
5351 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5352 fprintf (dump_file, "\n");
5355 fprintf (dump_file, "\n");
5358 /* For each ssa name determines whether it behaves as an induction variable
5359 in some loop. */
5360 if (!find_induction_variables (data))
5361 goto finish;
5363 /* Finds interesting uses (item 1). */
5364 find_interesting_uses (data);
5365 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5366 goto finish;
5368 /* Finds candidates for the induction variables (item 2). */
5369 find_iv_candidates (data);
5371 /* Calculates the costs (item 3, part 1). */
5372 determine_use_iv_costs (data);
5373 determine_iv_costs (data);
5374 determine_set_costs (data);
5376 /* Find the optimal set of induction variables (item 3, part 2). */
5377 iv_ca = find_optimal_iv_set (data);
5378 if (!iv_ca)
5379 goto finish;
5380 changed = true;
5382 /* Create the new induction variables (item 4, part 1). */
5383 create_new_ivs (data, iv_ca);
5384 iv_ca_free (&iv_ca);
5386 /* Rewrite the uses (item 4, part 2). */
5387 rewrite_uses (data);
5389 /* Remove the ivs that are unused after rewriting. */
5390 remove_unused_ivs (data);
5392 /* We have changed the structure of induction variables; it might happen
5393 that definitions in the scev database refer to some of them that were
5394 eliminated. */
5395 scev_reset ();
5397 finish:
5398 free_loop_data (data);
5400 return changed;
5403 /* Main entry point. Optimizes induction variables in LOOPS. */
5405 void
5406 tree_ssa_iv_optimize (struct loops *loops)
5408 struct loop *loop;
5409 struct ivopts_data data;
5411 tree_ssa_iv_optimize_init (loops, &data);
5413 /* Optimize the loops starting with the innermost ones. */
5414 loop = loops->tree_root;
5415 while (loop->inner)
5416 loop = loop->inner;
5418 /* Scan the loops, inner ones first. */
5419 while (loop != loops->tree_root)
5421 if (dump_file && (dump_flags & TDF_DETAILS))
5422 flow_loop_dump (loop, dump_file, NULL, 1);
5424 tree_ssa_iv_optimize_loop (&data, loop);
5426 if (loop->next)
5428 loop = loop->next;
5429 while (loop->inner)
5430 loop = loop->inner;
5432 else
5433 loop = loop->outer;
5436 /* FIXME. IV opts introduces new aliases and call-clobbered
5437 variables, which need to be renamed. However, when we call the
5438 renamer, not all statements will be scanned for operands. In
5439 particular, the newly introduced aliases may appear in statements
5440 that are considered "unmodified", so the renamer will not get a
5441 chance to rename those operands.
5443 Work around this problem by forcing an operand re-scan on every
5444 statement. This will not be necessary once the new operand
5445 scanner is implemented. */
5446 if (need_ssa_update_p ())
5448 basic_block bb;
5449 block_stmt_iterator si;
5450 FOR_EACH_BB (bb)
5451 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5452 update_stmt (bsi_stmt (si));
5455 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5456 tree_ssa_iv_optimize_finalize (loops, &data);