2005-04-05 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobd1bdb7add8eda214e6e3aaaff8b73439284680b1
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
148 /* Use. */
149 struct iv_use
151 unsigned id; /* The id of the use. */
152 enum use_type type; /* Type of the use. */
153 struct iv *iv; /* The induction variable it is based on. */
154 tree stmt; /* Statement in that it occurs. */
155 tree *op_p; /* The place where it occurs. */
156 bitmap related_cands; /* The set of "related" iv candidates, plus the common
157 important ones. */
159 unsigned n_map_members; /* Number of candidates in the cost_map list. */
160 struct cost_pair *cost_map;
161 /* The costs wrto the iv candidates. */
163 struct iv_cand *selected;
164 /* The selected candidate. */
167 /* The position where the iv is computed. */
168 enum iv_position
170 IP_NORMAL, /* At the end, just before the exit condition. */
171 IP_END, /* At the end of the latch block. */
172 IP_ORIGINAL /* The original biv. */
175 /* The induction variable candidate. */
176 struct iv_cand
178 unsigned id; /* The number of the candidate. */
179 bool important; /* Whether this is an "important" candidate, i.e. such
180 that it should be considered by all uses. */
181 enum iv_position pos; /* Where it is computed. */
182 tree incremented_at; /* For original biv, the statement where it is
183 incremented. */
184 tree var_before; /* The variable used for it before increment. */
185 tree var_after; /* The variable used for it after increment. */
186 struct iv *iv; /* The value of the candidate. NULL for
187 "pseudocandidate" used to indicate the possibility
188 to replace the final value of an iv by direct
189 computation of the value. */
190 unsigned cost; /* Cost of the candidate. */
193 /* The data used by the induction variable optimizations. */
195 struct ivopts_data
197 /* The currently optimized loop. */
198 struct loop *current_loop;
200 /* Numbers of iterations for all exits of the current loop. */
201 htab_t niters;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* A bitmap of important candidates. */
222 bitmap important_candidates;
224 /* Whether to consider just related and important candidates when replacing a
225 use. */
226 bool consider_all_candidates;
229 /* An assignment of iv candidates to uses. */
231 struct iv_ca
233 /* The number of uses covered by the assignment. */
234 unsigned upto;
236 /* Number of uses that cannot be expressed by the candidates in the set. */
237 unsigned bad_uses;
239 /* Candidate assigned to a use, together with the related costs. */
240 struct cost_pair **cand_for_use;
242 /* Number of times each candidate is used. */
243 unsigned *n_cand_uses;
245 /* The candidates used. */
246 bitmap cands;
248 /* The number of candidates in the set. */
249 unsigned n_cands;
251 /* Total number of registers needed. */
252 unsigned n_regs;
254 /* Total cost of expressing uses. */
255 unsigned cand_use_cost;
257 /* Total cost of candidates. */
258 unsigned cand_cost;
260 /* Number of times each invariant is used. */
261 unsigned *n_invariant_uses;
263 /* Total cost of the assignment. */
264 unsigned cost;
267 /* Difference of two iv candidate assignments. */
269 struct iv_ca_delta
271 /* Changed use. */
272 struct iv_use *use;
274 /* An old assignment (for rollback purposes). */
275 struct cost_pair *old_cp;
277 /* A new assignment. */
278 struct cost_pair *new_cp;
280 /* Next change in the list. */
281 struct iv_ca_delta *next_change;
284 /* Bound on number of candidates below that all candidates are considered. */
286 #define CONSIDER_ALL_CANDIDATES_BOUND \
287 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
290 optimizing such a loop would help, and it would take ages). */
292 #define MAX_CONSIDERED_USES \
293 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
295 /* If there are at most this number of ivs in the set, try removing unnecessary
296 ivs from the set always. */
298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
301 /* The list of trees for that the decl_rtl field must be reset is stored
302 here. */
304 static varray_type decl_rtl_to_reset;
306 /* Number of uses recorded in DATA. */
308 static inline unsigned
309 n_iv_uses (struct ivopts_data *data)
311 return VARRAY_ACTIVE_SIZE (data->iv_uses);
314 /* Ith use recorded in DATA. */
316 static inline struct iv_use *
317 iv_use (struct ivopts_data *data, unsigned i)
319 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
322 /* Number of candidates recorded in DATA. */
324 static inline unsigned
325 n_iv_cands (struct ivopts_data *data)
327 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
330 /* Ith candidate recorded in DATA. */
332 static inline struct iv_cand *
333 iv_cand (struct ivopts_data *data, unsigned i)
335 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
338 /* The data for LOOP. */
340 static inline struct loop_data *
341 loop_data (struct loop *loop)
343 return loop->aux;
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 static edge
349 single_dom_exit (struct loop *loop)
351 edge exit = loop->single_exit;
353 if (!exit)
354 return NULL;
356 if (!just_once_each_iteration_p (loop, exit->src))
357 return NULL;
359 return exit;
362 /* Dumps information about the induction variable IV to FILE. */
364 extern void dump_iv (FILE *, struct iv *);
365 void
366 dump_iv (FILE *file, struct iv *iv)
368 if (iv->ssa_name)
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
379 if (iv->step)
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
389 else
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
396 if (iv->base_object)
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
403 if (iv->biv_p)
404 fprintf (file, " is a biv\n");
407 /* Dumps information about the USE to FILE. */
409 extern void dump_use (FILE *, struct iv_use *);
410 void
411 dump_use (FILE *file, struct iv_use *use)
413 fprintf (file, "use %d\n", use->id);
415 switch (use->type)
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
419 break;
421 case USE_OUTER:
422 fprintf (file, " outside\n");
423 break;
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
433 default:
434 gcc_unreachable ();
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
461 unsigned i;
462 struct iv_use *use;
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
484 if (!iv)
486 fprintf (file, " final value replacement\n");
487 return;
490 switch (cand->pos)
492 case IP_NORMAL:
493 fprintf (file, " incremented before exit test\n");
494 break;
496 case IP_END:
497 fprintf (file, " incremented at end\n");
498 break;
500 case IP_ORIGINAL:
501 fprintf (file, " original biv\n");
502 break;
505 dump_iv (file, iv);
508 /* Returns the info for ssa version VER. */
510 static inline struct version_info *
511 ver_info (struct ivopts_data *data, unsigned ver)
513 return data->version_info + ver;
516 /* Returns the info for ssa name NAME. */
518 static inline struct version_info *
519 name_info (struct ivopts_data *data, tree name)
521 return ver_info (data, SSA_NAME_VERSION (name));
524 /* Checks whether there exists number X such that X * B = A, counting modulo
525 2^BITS. */
527 static bool
528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
529 HOST_WIDE_INT *x)
531 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
532 unsigned HOST_WIDE_INT inv, ex, val;
533 unsigned i;
535 a &= mask;
536 b &= mask;
538 /* First divide the whole equation by 2 as long as possible. */
539 while (!(a & 1) && !(b & 1))
541 a >>= 1;
542 b >>= 1;
543 bits--;
544 mask >>= 1;
547 if (!(b & 1))
549 /* If b is still even, a is odd and there is no such x. */
550 return false;
553 /* Find the inverse of b. We compute it as
554 b^(2^(bits - 1) - 1) (mod 2^bits). */
555 inv = 1;
556 ex = b;
557 for (i = 0; i < bits - 1; i++)
559 inv = (inv * ex) & mask;
560 ex = (ex * ex) & mask;
563 val = (a * inv) & mask;
565 gcc_assert (((val * b) & mask) == a);
567 if ((val >> (bits - 1)) & 1)
568 val |= ~mask;
570 *x = val;
572 return true;
575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
576 emitted in LOOP. */
578 static bool
579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
581 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
583 gcc_assert (bb);
585 if (sbb == loop->latch)
586 return true;
588 if (sbb != bb)
589 return false;
591 return stmt == last_stmt (bb);
594 /* Returns true if STMT if after the place where the original induction
595 variable CAND is incremented. */
597 static bool
598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
600 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
601 basic_block stmt_bb = bb_for_stmt (stmt);
602 block_stmt_iterator bsi;
604 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
605 return false;
607 if (stmt_bb != cand_bb)
608 return true;
610 /* Scan the block from the end, since the original ivs are usually
611 incremented at the end of the loop body. */
612 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
614 if (bsi_stmt (bsi) == cand->incremented_at)
615 return false;
616 if (bsi_stmt (bsi) == stmt)
617 return true;
621 /* Returns true if STMT if after the place where the induction variable
622 CAND is incremented in LOOP. */
624 static bool
625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
627 switch (cand->pos)
629 case IP_END:
630 return false;
632 case IP_NORMAL:
633 return stmt_after_ip_normal_pos (loop, stmt);
635 case IP_ORIGINAL:
636 return stmt_after_ip_original_pos (cand, stmt);
638 default:
639 gcc_unreachable ();
643 /* Element of the table in that we cache the numbers of iterations obtained
644 from exits of the loop. */
646 struct nfe_cache_elt
648 /* The edge for that the number of iterations is cached. */
649 edge exit;
651 /* True if the # of iterations was successfully determined. */
652 bool valid_p;
654 /* Description of # of iterations. */
655 struct tree_niter_desc niter;
658 /* Hash function for nfe_cache_elt E. */
660 static hashval_t
661 nfe_hash (const void *e)
663 const struct nfe_cache_elt *elt = e;
665 return htab_hash_pointer (elt->exit);
668 /* Equality function for nfe_cache_elt E1 and edge E2. */
670 static int
671 nfe_eq (const void *e1, const void *e2)
673 const struct nfe_cache_elt *elt1 = e1;
675 return elt1->exit == e2;
678 /* Returns structure describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
681 static struct tree_niter_desc *
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct nfe_cache_elt *nfe_desc;
685 PTR *slot;
687 slot = htab_find_slot_with_hash (data->niters, exit,
688 htab_hash_pointer (exit),
689 INSERT);
691 if (!*slot)
693 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
694 nfe_desc->exit = exit;
695 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
696 exit, &nfe_desc->niter);
697 *slot = nfe_desc;
699 else
700 nfe_desc = *slot;
702 if (!nfe_desc->valid_p)
703 return NULL;
705 return &nfe_desc->niter;
708 /* Returns structure describing number of iterations determined from
709 single dominating exit of DATA->current_loop, or NULL if something
710 goes wrong. */
712 static struct tree_niter_desc *
713 niter_for_single_dom_exit (struct ivopts_data *data)
715 edge exit = single_dom_exit (data->current_loop);
717 if (!exit)
718 return NULL;
720 return niter_for_exit (data, exit);
723 /* Initializes data structures used by the iv optimization pass, stored
724 in DATA. LOOPS is the loop tree. */
726 static void
727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
729 unsigned i;
731 data->version_info_size = 2 * num_ssa_names;
732 data->version_info = xcalloc (data->version_info_size,
733 sizeof (struct version_info));
734 data->relevant = BITMAP_ALLOC (NULL);
735 data->important_candidates = BITMAP_ALLOC (NULL);
736 data->max_inv_id = 0;
737 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
739 for (i = 1; i < loops->num; i++)
740 if (loops->parray[i])
741 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
744 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
745 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
748 /* Returns a memory object to that EXPR points. In case we are able to
749 determine that it does not point to any such object, NULL is returned. */
751 static tree
752 determine_base_object (tree expr)
754 enum tree_code code = TREE_CODE (expr);
755 tree base, obj, op0, op1;
757 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
758 return NULL_TREE;
760 switch (code)
762 case INTEGER_CST:
763 return NULL_TREE;
765 case ADDR_EXPR:
766 obj = TREE_OPERAND (expr, 0);
767 base = get_base_address (obj);
769 if (!base)
770 return expr;
772 if (TREE_CODE (base) == INDIRECT_REF)
773 return determine_base_object (TREE_OPERAND (base, 0));
775 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
777 case PLUS_EXPR:
778 case MINUS_EXPR:
779 op0 = determine_base_object (TREE_OPERAND (expr, 0));
780 op1 = determine_base_object (TREE_OPERAND (expr, 1));
782 if (!op1)
783 return op0;
785 if (!op0)
786 return (code == PLUS_EXPR
787 ? op1
788 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
790 return fold (build (code, ptr_type_node, op0, op1));
792 case NOP_EXPR:
793 case CONVERT_EXPR:
794 return determine_base_object (TREE_OPERAND (expr, 0));
796 default:
797 return fold_convert (ptr_type_node, expr);
801 /* Allocates an induction variable with given initial value BASE and step STEP
802 for loop LOOP. */
804 static struct iv *
805 alloc_iv (tree base, tree step)
807 struct iv *iv = xcalloc (1, sizeof (struct iv));
809 if (step && integer_zerop (step))
810 step = NULL_TREE;
812 iv->base = base;
813 iv->base_object = determine_base_object (base);
814 iv->step = step;
815 iv->biv_p = false;
816 iv->have_use_for = false;
817 iv->use_id = 0;
818 iv->ssa_name = NULL_TREE;
820 return iv;
823 /* Sets STEP and BASE for induction variable IV. */
825 static void
826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
828 struct version_info *info = name_info (data, iv);
830 gcc_assert (!info->iv);
832 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
833 info->iv = alloc_iv (base, step);
834 info->iv->ssa_name = iv;
837 /* Finds induction variable declaration for VAR. */
839 static struct iv *
840 get_iv (struct ivopts_data *data, tree var)
842 basic_block bb;
844 if (!name_info (data, var)->iv)
846 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
848 if (!bb
849 || !flow_bb_inside_loop_p (data->current_loop, bb))
850 set_iv (data, var, var, NULL_TREE);
853 return name_info (data, var)->iv;
856 /* Determines the step of a biv defined in PHI. */
858 static tree
859 determine_biv_step (tree phi)
861 struct loop *loop = bb_for_stmt (phi)->loop_father;
862 tree name = PHI_RESULT (phi), base, step;
863 tree type = TREE_TYPE (name);
865 if (!is_gimple_reg (name))
866 return NULL_TREE;
868 if (!simple_iv (loop, phi, name, &base, &step))
869 return NULL_TREE;
871 if (!step)
872 return build_int_cst (type, 0);
874 return step;
877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
879 static bool
880 abnormal_ssa_name_p (tree exp)
882 if (!exp)
883 return false;
885 if (TREE_CODE (exp) != SSA_NAME)
886 return false;
888 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
892 abnormal phi node. Callback for for_each_index. */
894 static bool
895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
896 void *data ATTRIBUTE_UNUSED)
898 if (TREE_CODE (base) == ARRAY_REF)
900 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
901 return false;
902 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
903 return false;
906 return !abnormal_ssa_name_p (*index);
909 /* Returns true if EXPR contains a ssa name that occurs in an
910 abnormal phi node. */
912 static bool
913 contains_abnormal_ssa_name_p (tree expr)
915 enum tree_code code = TREE_CODE (expr);
916 enum tree_code_class class = TREE_CODE_CLASS (code);
918 if (code == SSA_NAME)
919 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
921 if (code == INTEGER_CST
922 || is_gimple_min_invariant (expr))
923 return false;
925 if (code == ADDR_EXPR)
926 return !for_each_index (&TREE_OPERAND (expr, 0),
927 idx_contains_abnormal_ssa_name_p,
928 NULL);
930 switch (class)
932 case tcc_binary:
933 case tcc_comparison:
934 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
935 return true;
937 /* Fallthru. */
938 case tcc_unary:
939 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
940 return true;
942 break;
944 default:
945 gcc_unreachable ();
948 return false;
951 /* Finds basic ivs. */
953 static bool
954 find_bivs (struct ivopts_data *data)
956 tree phi, step, type, base;
957 bool found = false;
958 struct loop *loop = data->current_loop;
960 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
962 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
963 continue;
965 step = determine_biv_step (phi);
967 if (!step)
968 continue;
969 if (cst_and_fits_in_hwi (step)
970 && int_cst_value (step) == 0)
971 continue;
973 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
974 if (contains_abnormal_ssa_name_p (base))
975 continue;
977 type = TREE_TYPE (PHI_RESULT (phi));
978 base = fold_convert (type, base);
979 step = fold_convert (type, step);
981 /* FIXME: We do not handle induction variables whose step does
982 not satisfy cst_and_fits_in_hwi. */
983 if (!cst_and_fits_in_hwi (step))
984 continue;
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
990 return found;
993 /* Marks basic ivs. */
995 static void
996 mark_bivs (struct ivopts_data *data)
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to BASE and STEP. */
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030 tree *base, tree *step)
1032 tree lhs;
1033 struct loop *loop = data->current_loop;
1035 *base = NULL_TREE;
1036 *step = NULL_TREE;
1038 if (TREE_CODE (stmt) != MODIFY_EXPR)
1039 return false;
1041 lhs = TREE_OPERAND (stmt, 0);
1042 if (TREE_CODE (lhs) != SSA_NAME)
1043 return false;
1045 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046 return false;
1048 /* FIXME: We do not handle induction variables whose step does
1049 not satisfy cst_and_fits_in_hwi. */
1050 if (!zero_p (*step)
1051 && !cst_and_fits_in_hwi (*step))
1052 return false;
1054 if (contains_abnormal_ssa_name_p (*base))
1055 return false;
1057 return true;
1060 /* Finds general ivs in statement STMT. */
1062 static void
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1065 tree base, step;
1067 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068 return;
1070 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1073 /* Finds general ivs in basic block BB. */
1075 static void
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1078 block_stmt_iterator bsi;
1080 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081 find_givs_in_stmt (data, bsi_stmt (bsi));
1084 /* Finds general ivs. */
1086 static void
1087 find_givs (struct ivopts_data *data)
1089 struct loop *loop = data->current_loop;
1090 basic_block *body = get_loop_body_in_dom_order (loop);
1091 unsigned i;
1093 for (i = 0; i < loop->num_nodes; i++)
1094 find_givs_in_bb (data, body[i]);
1095 free (body);
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099 variable and if so, its initial value and step. */
1101 static bool
1102 find_induction_variables (struct ivopts_data *data)
1104 unsigned i;
1105 bitmap_iterator bi;
1107 if (!find_bivs (data))
1108 return false;
1110 find_givs (data);
1111 mark_bivs (data);
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1115 struct tree_niter_desc *niter;
1117 niter = niter_for_single_dom_exit (data);
1119 if (niter)
1121 fprintf (dump_file, " number of iterations ");
1122 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123 fprintf (dump_file, "\n");
1125 fprintf (dump_file, " may be zero if ");
1126 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127 fprintf (dump_file, "\n");
1128 fprintf (dump_file, "\n");
1131 fprintf (dump_file, "Induction variables:\n\n");
1133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1135 if (ver_info (data, i)->iv)
1136 dump_iv (dump_file, ver_info (data, i)->iv);
1140 return true;
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147 tree stmt, enum use_type use_type)
1149 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1151 use->id = n_iv_uses (data);
1152 use->type = use_type;
1153 use->iv = iv;
1154 use->stmt = stmt;
1155 use->op_p = use_p;
1156 use->related_cands = BITMAP_ALLOC (NULL);
1158 /* To avoid showing ssa name in the dumps, if it was not reset by the
1159 caller. */
1160 iv->ssa_name = NULL_TREE;
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 dump_use (dump_file, use);
1165 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1167 return use;
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171 NONLINEAR_USE is true if the invariant is used in a way we do not
1172 handle specially. */
1174 static void
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1177 basic_block bb;
1178 struct version_info *info;
1180 if (TREE_CODE (op) != SSA_NAME
1181 || !is_gimple_reg (op))
1182 return;
1184 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185 if (bb
1186 && flow_bb_inside_loop_p (data->current_loop, bb))
1187 return;
1189 info = name_info (data, op);
1190 info->name = op;
1191 info->has_nonlin_use |= nonlinear_use;
1192 if (!info->inv_id)
1193 info->inv_id = ++data->max_inv_id;
1194 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1197 /* Checks whether the use OP is interesting and if so, records it
1198 as TYPE. */
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202 enum use_type type)
1204 struct iv *iv;
1205 struct iv *civ;
1206 tree stmt;
1207 struct iv_use *use;
1209 if (TREE_CODE (op) != SSA_NAME)
1210 return NULL;
1212 iv = get_iv (data, op);
1213 if (!iv)
1214 return NULL;
1216 if (iv->have_use_for)
1218 use = iv_use (data, iv->use_id);
1220 gcc_assert (use->type == USE_NONLINEAR_EXPR
1221 || use->type == USE_OUTER);
1223 if (type == USE_NONLINEAR_EXPR)
1224 use->type = USE_NONLINEAR_EXPR;
1225 return use;
1228 if (zero_p (iv->step))
1230 record_invariant (data, op, true);
1231 return NULL;
1233 iv->have_use_for = true;
1235 civ = xmalloc (sizeof (struct iv));
1236 *civ = *iv;
1238 stmt = SSA_NAME_DEF_STMT (op);
1239 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240 || TREE_CODE (stmt) == MODIFY_EXPR);
1242 use = record_use (data, NULL, civ, stmt, type);
1243 iv->use_id = use->id;
1245 return use;
1248 /* Checks whether the use OP is interesting and if so, records it. */
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1253 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1256 /* Records a definition of induction variable OP that is used outside of the
1257 loop. */
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1262 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266 and if so, records it. */
1268 static void
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1271 tree *op0_p;
1272 tree *op1_p;
1273 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274 struct iv const_iv;
1275 tree zero = integer_zero_node;
1277 const_iv.step = NULL_TREE;
1279 if (integer_zerop (*cond_p)
1280 || integer_nonzerop (*cond_p))
1281 return;
1283 if (TREE_CODE (*cond_p) == SSA_NAME)
1285 op0_p = cond_p;
1286 op1_p = &zero;
1288 else
1290 op0_p = &TREE_OPERAND (*cond_p, 0);
1291 op1_p = &TREE_OPERAND (*cond_p, 1);
1294 if (TREE_CODE (*op0_p) == SSA_NAME)
1295 iv0 = get_iv (data, *op0_p);
1296 else
1297 iv0 = &const_iv;
1299 if (TREE_CODE (*op1_p) == SSA_NAME)
1300 iv1 = get_iv (data, *op1_p);
1301 else
1302 iv1 = &const_iv;
1304 if (/* When comparing with non-invariant value, we may not do any senseful
1305 induction variable elimination. */
1306 (!iv0 || !iv1)
1307 /* Eliminating condition based on two ivs would be nontrivial.
1308 ??? TODO -- it is not really important to handle this case. */
1309 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1311 find_interesting_uses_op (data, *op0_p);
1312 find_interesting_uses_op (data, *op1_p);
1313 return;
1316 if (zero_p (iv0->step) && zero_p (iv1->step))
1318 /* If both are invariants, this is a work for unswitching. */
1319 return;
1322 civ = xmalloc (sizeof (struct iv));
1323 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328 i.e. if all its operands are defined outside of the LOOP. */
1330 bool
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1333 basic_block def_bb;
1334 unsigned i, len;
1336 if (is_gimple_min_invariant (expr))
1337 return true;
1339 if (TREE_CODE (expr) == SSA_NAME)
1341 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342 if (def_bb
1343 && flow_bb_inside_loop_p (loop, def_bb))
1344 return false;
1346 return true;
1349 if (!EXPR_P (expr))
1350 return false;
1352 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353 for (i = 0; i < len; i++)
1354 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355 return false;
1357 return true;
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361 initial ones. Returns false when the value of the index cannot be determined.
1362 Callback for for_each_index. */
1364 struct ifs_ivopts_data
1366 struct ivopts_data *ivopts_data;
1367 tree stmt;
1368 tree *step_p;
1371 static bool
1372 idx_find_step (tree base, tree *idx, void *data)
1374 struct ifs_ivopts_data *dta = data;
1375 struct iv *iv;
1376 tree step, type, iv_type, iv_step, lbound, off;
1377 struct loop *loop = dta->ivopts_data->current_loop;
1379 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381 return false;
1383 /* If base is a component ref, require that the offset of the reference
1384 be invariant. */
1385 if (TREE_CODE (base) == COMPONENT_REF)
1387 off = component_ref_field_offset (base);
1388 return expr_invariant_in_loop_p (loop, off);
1391 /* If base is array, first check whether we will be able to move the
1392 reference out of the loop (in order to take its address in strength
1393 reduction). In order for this to work we need both lower bound
1394 and step to be loop invariants. */
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 step = array_ref_element_size (base);
1398 lbound = array_ref_low_bound (base);
1400 if (!expr_invariant_in_loop_p (loop, step)
1401 || !expr_invariant_in_loop_p (loop, lbound))
1402 return false;
1405 if (TREE_CODE (*idx) != SSA_NAME)
1406 return true;
1408 iv = get_iv (dta->ivopts_data, *idx);
1409 if (!iv)
1410 return false;
1412 *idx = iv->base;
1414 if (!iv->step)
1415 return true;
1417 iv_type = TREE_TYPE (iv->base);
1418 type = build_pointer_type (TREE_TYPE (base));
1419 if (TREE_CODE (base) == ARRAY_REF)
1421 step = array_ref_element_size (base);
1423 /* We only handle addresses whose step is an integer constant. */
1424 if (TREE_CODE (step) != INTEGER_CST)
1425 return false;
1427 else
1428 /* The step for pointer arithmetics already is 1 byte. */
1429 step = build_int_cst (type, 1);
1431 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433 type, iv->base, iv->step, dta->stmt);
1434 else
1435 iv_step = fold_convert (iv_type, iv->step);
1437 if (!iv_step)
1439 /* The index might wrap. */
1440 return false;
1443 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1445 if (!*dta->step_p)
1446 *dta->step_p = step;
1447 else
1448 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449 *dta->step_p, step);
1451 return true;
1454 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1455 object is passed to it in DATA. */
1457 static bool
1458 idx_record_use (tree base, tree *idx,
1459 void *data)
1461 find_interesting_uses_op (data, *idx);
1462 if (TREE_CODE (base) == ARRAY_REF)
1464 find_interesting_uses_op (data, array_ref_element_size (base));
1465 find_interesting_uses_op (data, array_ref_low_bound (base));
1467 return true;
1470 /* Returns true if memory reference REF may be unaligned. */
1472 static bool
1473 may_be_unaligned_p (tree ref)
1475 tree base;
1476 tree base_type;
1477 HOST_WIDE_INT bitsize;
1478 HOST_WIDE_INT bitpos;
1479 tree toffset;
1480 enum machine_mode mode;
1481 int unsignedp, volatilep;
1482 unsigned base_align;
1484 /* The test below is basically copy of what expr.c:normal_inner_ref
1485 does to check whether the object must be loaded by parts when
1486 STRICT_ALIGNMENT is true. */
1487 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488 &unsignedp, &volatilep, true);
1489 base_type = TREE_TYPE (base);
1490 base_align = TYPE_ALIGN (base_type);
1492 if (mode != BLKmode
1493 && (base_align < GET_MODE_ALIGNMENT (mode)
1494 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495 || bitpos % BITS_PER_UNIT != 0))
1496 return true;
1498 return false;
1501 /* Finds addresses in *OP_P inside STMT. */
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 tree base = unshare_expr (*op_p), step = NULL;
1507 struct iv *civ;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1510 /* Ignore bitfields for now. Not really something terribly complicated
1511 to handle. TODO. */
1512 if (TREE_CODE (base) == COMPONENT_REF
1513 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1514 goto fail;
1516 if (STRICT_ALIGNMENT
1517 && may_be_unaligned_p (base))
1518 goto fail;
1520 ifs_ivopts_data.ivopts_data = data;
1521 ifs_ivopts_data.stmt = stmt;
1522 ifs_ivopts_data.step_p = &step;
1523 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1524 || zero_p (step))
1525 goto fail;
1527 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1528 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1530 if (TREE_CODE (base) == INDIRECT_REF)
1531 base = TREE_OPERAND (base, 0);
1532 else
1533 base = build_addr (base);
1535 civ = alloc_iv (base, step);
1536 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1537 return;
1539 fail:
1540 for_each_index (op_p, idx_record_use, data);
1543 /* Finds and records invariants used in STMT. */
1545 static void
1546 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1548 use_optype uses = NULL;
1549 unsigned i, n;
1550 tree op;
1552 if (TREE_CODE (stmt) == PHI_NODE)
1553 n = PHI_NUM_ARGS (stmt);
1554 else
1556 get_stmt_operands (stmt);
1557 uses = STMT_USE_OPS (stmt);
1558 n = NUM_USES (uses);
1561 for (i = 0; i < n; i++)
1563 if (TREE_CODE (stmt) == PHI_NODE)
1564 op = PHI_ARG_DEF (stmt, i);
1565 else
1566 op = USE_OP (uses, i);
1568 record_invariant (data, op, false);
1572 /* Finds interesting uses of induction variables in the statement STMT. */
1574 static void
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1577 struct iv *iv;
1578 tree op, lhs, rhs;
1579 use_optype uses = NULL;
1580 unsigned i, n;
1582 find_invariants_stmt (data, stmt);
1584 if (TREE_CODE (stmt) == COND_EXPR)
1586 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1587 return;
1590 if (TREE_CODE (stmt) == MODIFY_EXPR)
1592 lhs = TREE_OPERAND (stmt, 0);
1593 rhs = TREE_OPERAND (stmt, 1);
1595 if (TREE_CODE (lhs) == SSA_NAME)
1597 /* If the statement defines an induction variable, the uses are not
1598 interesting by themselves. */
1600 iv = get_iv (data, lhs);
1602 if (iv && !zero_p (iv->step))
1603 return;
1606 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1608 case tcc_comparison:
1609 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1610 return;
1612 case tcc_reference:
1613 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1616 return;
1618 default: ;
1621 if (REFERENCE_CLASS_P (lhs)
1622 && is_gimple_val (rhs))
1624 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1625 find_interesting_uses_op (data, rhs);
1626 return;
1629 /* TODO -- we should also handle address uses of type
1631 memory = call (whatever);
1635 call (memory). */
1638 if (TREE_CODE (stmt) == PHI_NODE
1639 && bb_for_stmt (stmt) == data->current_loop->header)
1641 lhs = PHI_RESULT (stmt);
1642 iv = get_iv (data, lhs);
1644 if (iv && !zero_p (iv->step))
1645 return;
1648 if (TREE_CODE (stmt) == PHI_NODE)
1649 n = PHI_NUM_ARGS (stmt);
1650 else
1652 uses = STMT_USE_OPS (stmt);
1653 n = NUM_USES (uses);
1656 for (i = 0; i < n; i++)
1658 if (TREE_CODE (stmt) == PHI_NODE)
1659 op = PHI_ARG_DEF (stmt, i);
1660 else
1661 op = USE_OP (uses, i);
1663 if (TREE_CODE (op) != SSA_NAME)
1664 continue;
1666 iv = get_iv (data, op);
1667 if (!iv)
1668 continue;
1670 find_interesting_uses_op (data, op);
1674 /* Finds interesting uses of induction variables outside of loops
1675 on loop exit edge EXIT. */
1677 static void
1678 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1680 tree phi, def;
1682 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1684 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1685 find_interesting_uses_outer (data, def);
1689 /* Finds uses of the induction variables that are interesting. */
1691 static void
1692 find_interesting_uses (struct ivopts_data *data)
1694 basic_block bb;
1695 block_stmt_iterator bsi;
1696 tree phi;
1697 basic_block *body = get_loop_body (data->current_loop);
1698 unsigned i;
1699 struct version_info *info;
1700 edge e;
1702 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fprintf (dump_file, "Uses:\n\n");
1705 for (i = 0; i < data->current_loop->num_nodes; i++)
1707 edge_iterator ei;
1708 bb = body[i];
1710 FOR_EACH_EDGE (e, ei, bb->succs)
1711 if (e->dest != EXIT_BLOCK_PTR
1712 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1713 find_interesting_uses_outside (data, e);
1715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1716 find_interesting_uses_stmt (data, phi);
1717 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1718 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1723 bitmap_iterator bi;
1725 fprintf (dump_file, "\n");
1727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1729 info = ver_info (data, i);
1730 if (info->inv_id)
1732 fprintf (dump_file, " ");
1733 print_generic_expr (dump_file, info->name, TDF_SLIM);
1734 fprintf (dump_file, " is invariant (%d)%s\n",
1735 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1739 fprintf (dump_file, "\n");
1742 free (body);
1745 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1746 is true, assume we are inside an address. */
1748 static tree
1749 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1751 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1752 enum tree_code code;
1753 tree type, orig_type = TREE_TYPE (expr);
1754 unsigned HOST_WIDE_INT off0, off1, st;
1755 tree orig_expr = expr;
1757 STRIP_NOPS (expr);
1758 type = TREE_TYPE (expr);
1759 code = TREE_CODE (expr);
1760 *offset = 0;
1762 switch (code)
1764 case INTEGER_CST:
1765 if (!cst_and_fits_in_hwi (expr)
1766 || zero_p (expr))
1767 return orig_expr;
1769 *offset = int_cst_value (expr);
1770 return build_int_cst_type (orig_type, 0);
1772 case PLUS_EXPR:
1773 case MINUS_EXPR:
1774 op0 = TREE_OPERAND (expr, 0);
1775 op1 = TREE_OPERAND (expr, 1);
1777 op0 = strip_offset (op0, false, &off0);
1778 op1 = strip_offset (op1, false, &off1);
1780 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1781 if (op0 == TREE_OPERAND (expr, 0)
1782 && op1 == TREE_OPERAND (expr, 1))
1783 return orig_expr;
1785 if (zero_p (op1))
1786 expr = op0;
1787 else if (zero_p (op0))
1789 if (code == PLUS_EXPR)
1790 expr = op1;
1791 else
1792 expr = build1 (NEGATE_EXPR, type, op1);
1794 else
1795 expr = build2 (code, type, op0, op1);
1797 return fold_convert (orig_type, expr);
1799 case ARRAY_REF:
1800 if (!inside_addr)
1801 return orig_expr;
1803 step = array_ref_element_size (expr);
1804 if (!cst_and_fits_in_hwi (step))
1805 break;
1807 st = int_cst_value (step);
1808 op1 = TREE_OPERAND (expr, 1);
1809 op1 = strip_offset (op1, false, &off1);
1810 *offset = off1 * st;
1811 break;
1813 case COMPONENT_REF:
1814 if (!inside_addr)
1815 return orig_expr;
1816 break;
1818 case ADDR_EXPR:
1819 inside_addr = true;
1820 break;
1822 default:
1823 return orig_expr;
1826 /* Default handling of expressions for that we want to recurse into
1827 the first operand. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset (op0, inside_addr, &off0);
1830 *offset += off0;
1832 if (op0 == TREE_OPERAND (expr, 0)
1833 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1834 return orig_expr;
1836 expr = copy_node (expr);
1837 TREE_OPERAND (expr, 0) = op0;
1838 if (op1)
1839 TREE_OPERAND (expr, 1) = op1;
1841 return fold_convert (orig_type, expr);
1844 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1845 position to POS. If USE is not NULL, the candidate is set as related to
1846 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1847 replacement of the final value of the iv by a direct computation. */
1849 static struct iv_cand *
1850 add_candidate_1 (struct ivopts_data *data,
1851 tree base, tree step, bool important, enum iv_position pos,
1852 struct iv_use *use, tree incremented_at)
1854 unsigned i;
1855 struct iv_cand *cand = NULL;
1856 tree type;
1858 if (base)
1860 type = TREE_TYPE (base);
1861 if (!TYPE_UNSIGNED (type))
1863 type = unsigned_type_for (type);
1864 base = fold_convert (type, base);
1865 if (step)
1866 step = fold_convert (type, step);
1870 for (i = 0; i < n_iv_cands (data); i++)
1872 cand = iv_cand (data, i);
1874 if (cand->pos != pos)
1875 continue;
1877 if (cand->incremented_at != incremented_at)
1878 continue;
1880 if (!cand->iv)
1882 if (!base && !step)
1883 break;
1885 continue;
1888 if (!base && !step)
1889 continue;
1891 if (!operand_equal_p (base, cand->iv->base, 0))
1892 continue;
1894 if (zero_p (cand->iv->step))
1896 if (zero_p (step))
1897 break;
1899 else
1901 if (step && operand_equal_p (step, cand->iv->step, 0))
1902 break;
1906 if (i == n_iv_cands (data))
1908 cand = xcalloc (1, sizeof (struct iv_cand));
1909 cand->id = i;
1911 if (!base && !step)
1912 cand->iv = NULL;
1913 else
1914 cand->iv = alloc_iv (base, step);
1916 cand->pos = pos;
1917 if (pos != IP_ORIGINAL && cand->iv)
1919 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1920 cand->var_after = cand->var_before;
1922 cand->important = important;
1923 cand->incremented_at = incremented_at;
1924 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1927 dump_cand (dump_file, cand);
1930 if (important && !cand->important)
1932 cand->important = true;
1933 if (dump_file && (dump_flags & TDF_DETAILS))
1934 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1937 if (use)
1939 bitmap_set_bit (use->related_cands, i);
1940 if (dump_file && (dump_flags & TDF_DETAILS))
1941 fprintf (dump_file, "Candidate %d is related to use %d\n",
1942 cand->id, use->id);
1945 return cand;
1948 /* Returns true if incrementing the induction variable at the end of the LOOP
1949 is allowed.
1951 The purpose is to avoid splitting latch edge with a biv increment, thus
1952 creating a jump, possibly confusing other optimization passes and leaving
1953 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1954 is not available (so we do not have a better alternative), or if the latch
1955 edge is already nonempty. */
1957 static bool
1958 allow_ip_end_pos_p (struct loop *loop)
1960 if (!ip_normal_pos (loop))
1961 return true;
1963 if (!empty_block_p (ip_end_pos (loop)))
1964 return true;
1966 return false;
1969 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1970 position to POS. If USE is not NULL, the candidate is set as related to
1971 it. The candidate computation is scheduled on all available positions. */
1973 static void
1974 add_candidate (struct ivopts_data *data,
1975 tree base, tree step, bool important, struct iv_use *use)
1977 if (ip_normal_pos (data->current_loop))
1978 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1979 if (ip_end_pos (data->current_loop)
1980 && allow_ip_end_pos_p (data->current_loop))
1981 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1984 /* Add a standard "0 + 1 * iteration" iv candidate for a
1985 type with SIZE bits. */
1987 static void
1988 add_standard_iv_candidates_for_size (struct ivopts_data *data,
1989 unsigned int size)
1991 tree type = lang_hooks.types.type_for_size (size, true);
1992 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
1993 true, NULL);
1996 /* Adds standard iv candidates. */
1998 static void
1999 add_standard_iv_candidates (struct ivopts_data *data)
2001 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2003 /* The same for a double-integer type if it is still fast enough. */
2004 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2005 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2009 /* Adds candidates bases on the old induction variable IV. */
2011 static void
2012 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2014 tree phi, def;
2015 struct iv_cand *cand;
2017 add_candidate (data, iv->base, iv->step, true, NULL);
2019 /* The same, but with initial value zero. */
2020 add_candidate (data,
2021 build_int_cst (TREE_TYPE (iv->base), 0),
2022 iv->step, true, NULL);
2024 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2025 if (TREE_CODE (phi) == PHI_NODE)
2027 /* Additionally record the possibility of leaving the original iv
2028 untouched. */
2029 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2030 cand = add_candidate_1 (data,
2031 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2032 SSA_NAME_DEF_STMT (def));
2033 cand->var_before = iv->ssa_name;
2034 cand->var_after = def;
2038 /* Adds candidates based on the old induction variables. */
2040 static void
2041 add_old_ivs_candidates (struct ivopts_data *data)
2043 unsigned i;
2044 struct iv *iv;
2045 bitmap_iterator bi;
2047 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2049 iv = ver_info (data, i)->iv;
2050 if (iv && iv->biv_p && !zero_p (iv->step))
2051 add_old_iv_candidates (data, iv);
2055 /* Adds candidates based on the value of the induction variable IV and USE. */
2057 static void
2058 add_iv_value_candidates (struct ivopts_data *data,
2059 struct iv *iv, struct iv_use *use)
2061 add_candidate (data, iv->base, iv->step, false, use);
2063 /* The same, but with initial value zero. */
2064 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2065 iv->step, false, use);
2068 /* Adds candidates based on the address IV and USE. */
2070 static void
2071 add_address_candidates (struct ivopts_data *data,
2072 struct iv *iv, struct iv_use *use)
2074 tree base, abase;
2075 unsigned HOST_WIDE_INT offset;
2077 /* First, the trivial choices. */
2078 add_iv_value_candidates (data, iv, use);
2080 /* Second, try removing the COMPONENT_REFs. */
2081 if (TREE_CODE (iv->base) == ADDR_EXPR)
2083 base = TREE_OPERAND (iv->base, 0);
2084 while (TREE_CODE (base) == COMPONENT_REF
2085 || (TREE_CODE (base) == ARRAY_REF
2086 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2087 base = TREE_OPERAND (base, 0);
2089 if (base != TREE_OPERAND (iv->base, 0))
2091 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2092 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2094 if (TREE_CODE (base) == INDIRECT_REF)
2095 base = TREE_OPERAND (base, 0);
2096 else
2097 base = build_addr (base);
2098 add_candidate (data, base, iv->step, false, use);
2102 /* Third, try removing the constant offset. */
2103 abase = iv->base;
2104 base = strip_offset (abase, false, &offset);
2105 if (offset)
2106 add_candidate (data, base, iv->step, false, use);
2109 /* Possibly adds pseudocandidate for replacing the final value of USE by
2110 a direct computation. */
2112 static void
2113 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2115 struct tree_niter_desc *niter;
2117 /* We must know where we exit the loop and how many times does it roll. */
2118 niter = niter_for_single_dom_exit (data);
2119 if (!niter
2120 || !zero_p (niter->may_be_zero))
2121 return;
2123 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2126 /* Adds candidates based on the uses. */
2128 static void
2129 add_derived_ivs_candidates (struct ivopts_data *data)
2131 unsigned i;
2133 for (i = 0; i < n_iv_uses (data); i++)
2135 struct iv_use *use = iv_use (data, i);
2137 if (!use)
2138 continue;
2140 switch (use->type)
2142 case USE_NONLINEAR_EXPR:
2143 case USE_COMPARE:
2144 /* Just add the ivs based on the value of the iv used here. */
2145 add_iv_value_candidates (data, use->iv, use);
2146 break;
2148 case USE_OUTER:
2149 add_iv_value_candidates (data, use->iv, use);
2151 /* Additionally, add the pseudocandidate for the possibility to
2152 replace the final value by a direct computation. */
2153 add_iv_outer_candidates (data, use);
2154 break;
2156 case USE_ADDRESS:
2157 add_address_candidates (data, use->iv, use);
2158 break;
2160 default:
2161 gcc_unreachable ();
2166 /* Record important candidates and add them to related_cands bitmaps
2167 if needed. */
2169 static void
2170 record_important_candidates (struct ivopts_data *data)
2172 unsigned i;
2173 struct iv_use *use;
2175 for (i = 0; i < n_iv_cands (data); i++)
2177 struct iv_cand *cand = iv_cand (data, i);
2179 if (cand->important)
2180 bitmap_set_bit (data->important_candidates, i);
2183 data->consider_all_candidates = (n_iv_cands (data)
2184 <= CONSIDER_ALL_CANDIDATES_BOUND);
2186 if (data->consider_all_candidates)
2188 /* We will not need "related_cands" bitmaps in this case,
2189 so release them to decrease peak memory consumption. */
2190 for (i = 0; i < n_iv_uses (data); i++)
2192 use = iv_use (data, i);
2193 BITMAP_FREE (use->related_cands);
2196 else
2198 /* Add important candidates to the related_cands bitmaps. */
2199 for (i = 0; i < n_iv_uses (data); i++)
2200 bitmap_ior_into (iv_use (data, i)->related_cands,
2201 data->important_candidates);
2205 /* Finds the candidates for the induction variables. */
2207 static void
2208 find_iv_candidates (struct ivopts_data *data)
2210 /* Add commonly used ivs. */
2211 add_standard_iv_candidates (data);
2213 /* Add old induction variables. */
2214 add_old_ivs_candidates (data);
2216 /* Add induction variables derived from uses. */
2217 add_derived_ivs_candidates (data);
2219 /* Record the important candidates. */
2220 record_important_candidates (data);
2223 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2224 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2225 we allocate a simple list to every use. */
2227 static void
2228 alloc_use_cost_map (struct ivopts_data *data)
2230 unsigned i, size, s, j;
2232 for (i = 0; i < n_iv_uses (data); i++)
2234 struct iv_use *use = iv_use (data, i);
2235 bitmap_iterator bi;
2237 if (data->consider_all_candidates)
2238 size = n_iv_cands (data);
2239 else
2241 s = 0;
2242 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2244 s++;
2247 /* Round up to the power of two, so that moduling by it is fast. */
2248 for (size = 1; size < s; size <<= 1)
2249 continue;
2252 use->n_map_members = size;
2253 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2257 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2258 on invariants DEPENDS_ON. */
2260 static void
2261 set_use_iv_cost (struct ivopts_data *data,
2262 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2263 bitmap depends_on)
2265 unsigned i, s;
2267 if (cost == INFTY)
2269 BITMAP_FREE (depends_on);
2270 return;
2273 if (data->consider_all_candidates)
2275 use->cost_map[cand->id].cand = cand;
2276 use->cost_map[cand->id].cost = cost;
2277 use->cost_map[cand->id].depends_on = depends_on;
2278 return;
2281 /* n_map_members is a power of two, so this computes modulo. */
2282 s = cand->id & (use->n_map_members - 1);
2283 for (i = s; i < use->n_map_members; i++)
2284 if (!use->cost_map[i].cand)
2285 goto found;
2286 for (i = 0; i < s; i++)
2287 if (!use->cost_map[i].cand)
2288 goto found;
2290 gcc_unreachable ();
2292 found:
2293 use->cost_map[i].cand = cand;
2294 use->cost_map[i].cost = cost;
2295 use->cost_map[i].depends_on = depends_on;
2298 /* Gets cost of (USE, CANDIDATE) pair. */
2300 static struct cost_pair *
2301 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2302 struct iv_cand *cand)
2304 unsigned i, s;
2305 struct cost_pair *ret;
2307 if (!cand)
2308 return NULL;
2310 if (data->consider_all_candidates)
2312 ret = use->cost_map + cand->id;
2313 if (!ret->cand)
2314 return NULL;
2316 return ret;
2319 /* n_map_members is a power of two, so this computes modulo. */
2320 s = cand->id & (use->n_map_members - 1);
2321 for (i = s; i < use->n_map_members; i++)
2322 if (use->cost_map[i].cand == cand)
2323 return use->cost_map + i;
2325 for (i = 0; i < s; i++)
2326 if (use->cost_map[i].cand == cand)
2327 return use->cost_map + i;
2329 return NULL;
2332 /* Returns estimate on cost of computing SEQ. */
2334 static unsigned
2335 seq_cost (rtx seq)
2337 unsigned cost = 0;
2338 rtx set;
2340 for (; seq; seq = NEXT_INSN (seq))
2342 set = single_set (seq);
2343 if (set)
2344 cost += rtx_cost (set, SET);
2345 else
2346 cost++;
2349 return cost;
2352 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2353 static rtx
2354 produce_memory_decl_rtl (tree obj, int *regno)
2356 rtx x;
2358 gcc_assert (obj);
2359 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2361 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2362 x = gen_rtx_SYMBOL_REF (Pmode, name);
2364 else
2365 x = gen_raw_REG (Pmode, (*regno)++);
2367 return gen_rtx_MEM (DECL_MODE (obj), x);
2370 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2371 walk_tree. DATA contains the actual fake register number. */
2373 static tree
2374 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2376 tree obj = NULL_TREE;
2377 rtx x = NULL_RTX;
2378 int *regno = data;
2380 switch (TREE_CODE (*expr_p))
2382 case ADDR_EXPR:
2383 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2384 handled_component_p (*expr_p);
2385 expr_p = &TREE_OPERAND (*expr_p, 0))
2386 continue;
2387 obj = *expr_p;
2388 if (DECL_P (obj))
2389 x = produce_memory_decl_rtl (obj, regno);
2390 break;
2392 case SSA_NAME:
2393 *ws = 0;
2394 obj = SSA_NAME_VAR (*expr_p);
2395 if (!DECL_RTL_SET_P (obj))
2396 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2397 break;
2399 case VAR_DECL:
2400 case PARM_DECL:
2401 case RESULT_DECL:
2402 *ws = 0;
2403 obj = *expr_p;
2405 if (DECL_RTL_SET_P (obj))
2406 break;
2408 if (DECL_MODE (obj) == BLKmode)
2409 x = produce_memory_decl_rtl (obj, regno);
2410 else
2411 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2413 break;
2415 default:
2416 break;
2419 if (x)
2421 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2422 SET_DECL_RTL (obj, x);
2425 return NULL_TREE;
2428 /* Determines cost of the computation of EXPR. */
2430 static unsigned
2431 computation_cost (tree expr)
2433 rtx seq, rslt;
2434 tree type = TREE_TYPE (expr);
2435 unsigned cost;
2436 /* Avoid using hard regs in ways which may be unsupported. */
2437 int regno = LAST_VIRTUAL_REGISTER + 1;
2439 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2440 start_sequence ();
2441 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2442 seq = get_insns ();
2443 end_sequence ();
2445 cost = seq_cost (seq);
2446 if (GET_CODE (rslt) == MEM)
2447 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2449 return cost;
2452 /* Returns variable containing the value of candidate CAND at statement AT. */
2454 static tree
2455 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2457 if (stmt_after_increment (loop, cand, stmt))
2458 return cand->var_after;
2459 else
2460 return cand->var_before;
2463 /* Determines the expression by that USE is expressed from induction variable
2464 CAND at statement AT in LOOP. */
2466 static tree
2467 get_computation_at (struct loop *loop,
2468 struct iv_use *use, struct iv_cand *cand, tree at)
2470 tree ubase = use->iv->base;
2471 tree ustep = use->iv->step;
2472 tree cbase = cand->iv->base;
2473 tree cstep = cand->iv->step;
2474 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2475 tree uutype;
2476 tree expr, delta;
2477 tree ratio;
2478 unsigned HOST_WIDE_INT ustepi, cstepi;
2479 HOST_WIDE_INT ratioi;
2481 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2483 /* We do not have a precision to express the values of use. */
2484 return NULL_TREE;
2487 expr = var_at_stmt (loop, cand, at);
2489 if (TREE_TYPE (expr) != ctype)
2491 /* This may happen with the original ivs. */
2492 expr = fold_convert (ctype, expr);
2495 if (TYPE_UNSIGNED (utype))
2496 uutype = utype;
2497 else
2499 uutype = unsigned_type_for (utype);
2500 ubase = fold_convert (uutype, ubase);
2501 ustep = fold_convert (uutype, ustep);
2504 if (uutype != ctype)
2506 expr = fold_convert (uutype, expr);
2507 cbase = fold_convert (uutype, cbase);
2508 cstep = fold_convert (uutype, cstep);
2511 if (!cst_and_fits_in_hwi (cstep)
2512 || !cst_and_fits_in_hwi (ustep))
2513 return NULL_TREE;
2515 ustepi = int_cst_value (ustep);
2516 cstepi = int_cst_value (cstep);
2518 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2520 /* TODO maybe consider case when ustep divides cstep and the ratio is
2521 a power of 2 (so that the division is fast to execute)? We would
2522 need to be much more careful with overflows etc. then. */
2523 return NULL_TREE;
2526 /* We may need to shift the value if we are after the increment. */
2527 if (stmt_after_increment (loop, cand, at))
2528 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2530 /* use = ubase - ratio * cbase + ratio * var.
2532 In general case ubase + ratio * (var - cbase) could be better (one less
2533 multiplication), but often it is possible to eliminate redundant parts
2534 of computations from (ubase - ratio * cbase) term, and if it does not
2535 happen, fold is able to apply the distributive law to obtain this form
2536 anyway. */
2538 if (ratioi == 1)
2540 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2541 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2543 else if (ratioi == -1)
2545 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2546 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2548 else
2550 ratio = build_int_cst_type (uutype, ratioi);
2551 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2552 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2553 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2554 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2557 return fold_convert (utype, expr);
2560 /* Determines the expression by that USE is expressed from induction variable
2561 CAND in LOOP. */
2563 static tree
2564 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2566 return get_computation_at (loop, use, cand, use->stmt);
2569 /* Returns cost of addition in MODE. */
2571 static unsigned
2572 add_cost (enum machine_mode mode)
2574 static unsigned costs[NUM_MACHINE_MODES];
2575 rtx seq;
2576 unsigned cost;
2578 if (costs[mode])
2579 return costs[mode];
2581 start_sequence ();
2582 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2583 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2584 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2585 NULL_RTX);
2586 seq = get_insns ();
2587 end_sequence ();
2589 cost = seq_cost (seq);
2590 if (!cost)
2591 cost = 1;
2593 costs[mode] = cost;
2595 if (dump_file && (dump_flags & TDF_DETAILS))
2596 fprintf (dump_file, "Addition in %s costs %d\n",
2597 GET_MODE_NAME (mode), cost);
2598 return cost;
2601 /* Entry in a hashtable of already known costs for multiplication. */
2602 struct mbc_entry
2604 HOST_WIDE_INT cst; /* The constant to multiply by. */
2605 enum machine_mode mode; /* In mode. */
2606 unsigned cost; /* The cost. */
2609 /* Counts hash value for the ENTRY. */
2611 static hashval_t
2612 mbc_entry_hash (const void *entry)
2614 const struct mbc_entry *e = entry;
2616 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2619 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2621 static int
2622 mbc_entry_eq (const void *entry1, const void *entry2)
2624 const struct mbc_entry *e1 = entry1;
2625 const struct mbc_entry *e2 = entry2;
2627 return (e1->mode == e2->mode
2628 && e1->cst == e2->cst);
2631 /* Returns cost of multiplication by constant CST in MODE. */
2633 static unsigned
2634 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2636 static htab_t costs;
2637 struct mbc_entry **cached, act;
2638 rtx seq;
2639 unsigned cost;
2641 if (!costs)
2642 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2644 act.mode = mode;
2645 act.cst = cst;
2646 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2647 if (*cached)
2648 return (*cached)->cost;
2650 *cached = xmalloc (sizeof (struct mbc_entry));
2651 (*cached)->mode = mode;
2652 (*cached)->cst = cst;
2654 start_sequence ();
2655 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2656 NULL_RTX, 0);
2657 seq = get_insns ();
2658 end_sequence ();
2660 cost = seq_cost (seq);
2662 if (dump_file && (dump_flags & TDF_DETAILS))
2663 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2664 (int) cst, GET_MODE_NAME (mode), cost);
2666 (*cached)->cost = cost;
2668 return cost;
2671 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2672 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2673 variable is omitted. The created memory accesses MODE.
2675 TODO -- there must be some better way. This all is quite crude. */
2677 static unsigned
2678 get_address_cost (bool symbol_present, bool var_present,
2679 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2681 #define MAX_RATIO 128
2682 static sbitmap valid_mult;
2683 static HOST_WIDE_INT rat, off;
2684 static HOST_WIDE_INT min_offset, max_offset;
2685 static unsigned costs[2][2][2][2];
2686 unsigned cost, acost;
2687 rtx seq, addr, base;
2688 bool offset_p, ratio_p;
2689 rtx reg1;
2690 HOST_WIDE_INT s_offset;
2691 unsigned HOST_WIDE_INT mask;
2692 unsigned bits;
2694 if (!valid_mult)
2696 HOST_WIDE_INT i;
2698 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2700 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2701 for (i = 1; i <= 1 << 20; i <<= 1)
2703 XEXP (addr, 1) = GEN_INT (i);
2704 if (!memory_address_p (Pmode, addr))
2705 break;
2707 max_offset = i >> 1;
2708 off = max_offset;
2710 for (i = 1; i <= 1 << 20; i <<= 1)
2712 XEXP (addr, 1) = GEN_INT (-i);
2713 if (!memory_address_p (Pmode, addr))
2714 break;
2716 min_offset = -(i >> 1);
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2720 fprintf (dump_file, "get_address_cost:\n");
2721 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2722 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2725 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2726 sbitmap_zero (valid_mult);
2727 rat = 1;
2728 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2729 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2731 XEXP (addr, 1) = GEN_INT (i);
2732 if (memory_address_p (Pmode, addr))
2734 SET_BIT (valid_mult, i + MAX_RATIO);
2735 rat = i;
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, " allowed multipliers:");
2742 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2743 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2744 fprintf (dump_file, " %d", (int) i);
2745 fprintf (dump_file, "\n");
2746 fprintf (dump_file, "\n");
2750 bits = GET_MODE_BITSIZE (Pmode);
2751 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2752 offset &= mask;
2753 if ((offset >> (bits - 1) & 1))
2754 offset |= ~mask;
2755 s_offset = offset;
2757 cost = 0;
2758 offset_p = (s_offset != 0
2759 && min_offset <= s_offset && s_offset <= max_offset);
2760 ratio_p = (ratio != 1
2761 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2762 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2764 if (ratio != 1 && !ratio_p)
2765 cost += multiply_by_cost (ratio, Pmode);
2767 if (s_offset && !offset_p && !symbol_present)
2769 cost += add_cost (Pmode);
2770 var_present = true;
2773 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2774 if (!acost)
2776 acost = 0;
2778 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2779 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2780 if (ratio_p)
2781 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2783 if (var_present)
2784 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2786 if (symbol_present)
2788 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2789 if (offset_p)
2790 base = gen_rtx_fmt_e (CONST, Pmode,
2791 gen_rtx_fmt_ee (PLUS, Pmode,
2792 base,
2793 GEN_INT (off)));
2795 else if (offset_p)
2796 base = GEN_INT (off);
2797 else
2798 base = NULL_RTX;
2800 if (base)
2801 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2803 start_sequence ();
2804 addr = memory_address (Pmode, addr);
2805 seq = get_insns ();
2806 end_sequence ();
2808 acost = seq_cost (seq);
2809 acost += address_cost (addr, Pmode);
2811 if (!acost)
2812 acost = 1;
2813 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2816 return cost + acost;
2819 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2820 the bitmap to that we should store it. */
2822 static struct ivopts_data *fd_ivopts_data;
2823 static tree
2824 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2826 bitmap *depends_on = data;
2827 struct version_info *info;
2829 if (TREE_CODE (*expr_p) != SSA_NAME)
2830 return NULL_TREE;
2831 info = name_info (fd_ivopts_data, *expr_p);
2833 if (!info->inv_id || info->has_nonlin_use)
2834 return NULL_TREE;
2836 if (!*depends_on)
2837 *depends_on = BITMAP_ALLOC (NULL);
2838 bitmap_set_bit (*depends_on, info->inv_id);
2840 return NULL_TREE;
2843 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2844 invariants the computation depends on. */
2846 static unsigned
2847 force_var_cost (struct ivopts_data *data,
2848 tree expr, bitmap *depends_on)
2850 static bool costs_initialized = false;
2851 static unsigned integer_cost;
2852 static unsigned symbol_cost;
2853 static unsigned address_cost;
2854 tree op0, op1;
2855 unsigned cost0, cost1, cost;
2856 enum machine_mode mode;
2858 if (!costs_initialized)
2860 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2861 rtx x = gen_rtx_MEM (DECL_MODE (var),
2862 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2863 tree addr;
2864 tree type = build_pointer_type (integer_type_node);
2866 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2867 2000));
2869 SET_DECL_RTL (var, x);
2870 TREE_STATIC (var) = 1;
2871 addr = build1 (ADDR_EXPR, type, var);
2872 symbol_cost = computation_cost (addr) + 1;
2874 address_cost
2875 = computation_cost (build2 (PLUS_EXPR, type,
2876 addr,
2877 build_int_cst_type (type, 2000))) + 1;
2878 if (dump_file && (dump_flags & TDF_DETAILS))
2880 fprintf (dump_file, "force_var_cost:\n");
2881 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2882 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2883 fprintf (dump_file, " address %d\n", (int) address_cost);
2884 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2885 fprintf (dump_file, "\n");
2888 costs_initialized = true;
2891 STRIP_NOPS (expr);
2893 if (depends_on)
2895 fd_ivopts_data = data;
2896 walk_tree (&expr, find_depends, depends_on, NULL);
2899 if (SSA_VAR_P (expr))
2900 return 0;
2902 if (TREE_INVARIANT (expr))
2904 if (TREE_CODE (expr) == INTEGER_CST)
2905 return integer_cost;
2907 if (TREE_CODE (expr) == ADDR_EXPR)
2909 tree obj = TREE_OPERAND (expr, 0);
2911 if (TREE_CODE (obj) == VAR_DECL
2912 || TREE_CODE (obj) == PARM_DECL
2913 || TREE_CODE (obj) == RESULT_DECL)
2914 return symbol_cost;
2917 return address_cost;
2920 switch (TREE_CODE (expr))
2922 case PLUS_EXPR:
2923 case MINUS_EXPR:
2924 case MULT_EXPR:
2925 op0 = TREE_OPERAND (expr, 0);
2926 op1 = TREE_OPERAND (expr, 1);
2927 STRIP_NOPS (op0);
2928 STRIP_NOPS (op1);
2930 if (is_gimple_val (op0))
2931 cost0 = 0;
2932 else
2933 cost0 = force_var_cost (data, op0, NULL);
2935 if (is_gimple_val (op1))
2936 cost1 = 0;
2937 else
2938 cost1 = force_var_cost (data, op1, NULL);
2940 break;
2942 default:
2943 /* Just an arbitrary value, FIXME. */
2944 return target_spill_cost;
2947 mode = TYPE_MODE (TREE_TYPE (expr));
2948 switch (TREE_CODE (expr))
2950 case PLUS_EXPR:
2951 case MINUS_EXPR:
2952 cost = add_cost (mode);
2953 break;
2955 case MULT_EXPR:
2956 if (cst_and_fits_in_hwi (op0))
2957 cost = multiply_by_cost (int_cst_value (op0), mode);
2958 else if (cst_and_fits_in_hwi (op1))
2959 cost = multiply_by_cost (int_cst_value (op1), mode);
2960 else
2961 return target_spill_cost;
2962 break;
2964 default:
2965 gcc_unreachable ();
2968 cost += cost0;
2969 cost += cost1;
2971 /* Bound the cost by target_spill_cost. The parts of complicated
2972 computations often are either loop invariant or at least can
2973 be shared between several iv uses, so letting this grow without
2974 limits would not give reasonable results. */
2975 return cost < target_spill_cost ? cost : target_spill_cost;
2978 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2979 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2980 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2981 invariants the computation depends on. */
2983 static unsigned
2984 split_address_cost (struct ivopts_data *data,
2985 tree addr, bool *symbol_present, bool *var_present,
2986 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2988 tree core;
2989 HOST_WIDE_INT bitsize;
2990 HOST_WIDE_INT bitpos;
2991 tree toffset;
2992 enum machine_mode mode;
2993 int unsignedp, volatilep;
2995 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2996 &unsignedp, &volatilep, false);
2998 if (toffset != 0
2999 || bitpos % BITS_PER_UNIT != 0
3000 || TREE_CODE (core) != VAR_DECL)
3002 *symbol_present = false;
3003 *var_present = true;
3004 fd_ivopts_data = data;
3005 walk_tree (&addr, find_depends, depends_on, NULL);
3006 return target_spill_cost;
3009 *offset += bitpos / BITS_PER_UNIT;
3010 if (TREE_STATIC (core)
3011 || DECL_EXTERNAL (core))
3013 *symbol_present = true;
3014 *var_present = false;
3015 return 0;
3018 *symbol_present = false;
3019 *var_present = true;
3020 return 0;
3023 /* Estimates cost of expressing difference of addresses E1 - E2 as
3024 var + symbol + offset. The value of offset is added to OFFSET,
3025 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3026 part is missing. DEPENDS_ON is a set of the invariants the computation
3027 depends on. */
3029 static unsigned
3030 ptr_difference_cost (struct ivopts_data *data,
3031 tree e1, tree e2, bool *symbol_present, bool *var_present,
3032 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3034 HOST_WIDE_INT diff = 0;
3035 unsigned cost;
3037 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3039 if (ptr_difference_const (e1, e2, &diff))
3041 *offset += diff;
3042 *symbol_present = false;
3043 *var_present = false;
3044 return 0;
3047 if (e2 == integer_zero_node)
3048 return split_address_cost (data, TREE_OPERAND (e1, 0),
3049 symbol_present, var_present, offset, depends_on);
3051 *symbol_present = false;
3052 *var_present = true;
3054 cost = force_var_cost (data, e1, depends_on);
3055 cost += force_var_cost (data, e2, depends_on);
3056 cost += add_cost (Pmode);
3058 return cost;
3061 /* Estimates cost of expressing difference E1 - E2 as
3062 var + symbol + offset. The value of offset is added to OFFSET,
3063 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3064 part is missing. DEPENDS_ON is a set of the invariants the computation
3065 depends on. */
3067 static unsigned
3068 difference_cost (struct ivopts_data *data,
3069 tree e1, tree e2, bool *symbol_present, bool *var_present,
3070 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3072 unsigned cost;
3073 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3074 unsigned HOST_WIDE_INT off1, off2;
3076 e1 = strip_offset (e1, false, &off1);
3077 e2 = strip_offset (e2, false, &off2);
3078 *offset += off1 - off2;
3080 STRIP_NOPS (e1);
3081 STRIP_NOPS (e2);
3083 if (TREE_CODE (e1) == ADDR_EXPR)
3084 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3085 depends_on);
3086 *symbol_present = false;
3088 if (operand_equal_p (e1, e2, 0))
3090 *var_present = false;
3091 return 0;
3093 *var_present = true;
3094 if (zero_p (e2))
3095 return force_var_cost (data, e1, depends_on);
3097 if (zero_p (e1))
3099 cost = force_var_cost (data, e2, depends_on);
3100 cost += multiply_by_cost (-1, mode);
3102 return cost;
3105 cost = force_var_cost (data, e1, depends_on);
3106 cost += force_var_cost (data, e2, depends_on);
3107 cost += add_cost (mode);
3109 return cost;
3112 /* Determines the cost of the computation by that USE is expressed
3113 from induction variable CAND. If ADDRESS_P is true, we just need
3114 to create an address from it, otherwise we want to get it into
3115 register. A set of invariants we depend on is stored in
3116 DEPENDS_ON. AT is the statement at that the value is computed. */
3118 static unsigned
3119 get_computation_cost_at (struct ivopts_data *data,
3120 struct iv_use *use, struct iv_cand *cand,
3121 bool address_p, bitmap *depends_on, tree at)
3123 tree ubase = use->iv->base, ustep = use->iv->step;
3124 tree cbase, cstep;
3125 tree utype = TREE_TYPE (ubase), ctype;
3126 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3127 HOST_WIDE_INT ratio, aratio;
3128 bool var_present, symbol_present;
3129 unsigned cost = 0, n_sums;
3131 *depends_on = NULL;
3133 /* Only consider real candidates. */
3134 if (!cand->iv)
3135 return INFTY;
3137 cbase = cand->iv->base;
3138 cstep = cand->iv->step;
3139 ctype = TREE_TYPE (cbase);
3141 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3143 /* We do not have a precision to express the values of use. */
3144 return INFTY;
3147 if (address_p)
3149 /* Do not try to express address of an object with computation based
3150 on address of a different object. This may cause problems in rtl
3151 level alias analysis (that does not expect this to be happening,
3152 as this is illegal in C), and would be unlikely to be useful
3153 anyway. */
3154 if (use->iv->base_object
3155 && cand->iv->base_object
3156 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3157 return INFTY;
3160 if (!cst_and_fits_in_hwi (ustep)
3161 || !cst_and_fits_in_hwi (cstep))
3162 return INFTY;
3164 if (TREE_CODE (ubase) == INTEGER_CST
3165 && !cst_and_fits_in_hwi (ubase))
3166 goto fallback;
3168 if (TREE_CODE (cbase) == INTEGER_CST
3169 && !cst_and_fits_in_hwi (cbase))
3170 goto fallback;
3172 ustepi = int_cst_value (ustep);
3173 cstepi = int_cst_value (cstep);
3175 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3177 /* TODO -- add direct handling of this case. */
3178 goto fallback;
3181 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3182 return INFTY;
3184 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3185 or ratio == 1, it is better to handle this like
3187 ubase - ratio * cbase + ratio * var
3189 (also holds in the case ratio == -1, TODO. */
3191 if (TREE_CODE (cbase) == INTEGER_CST)
3193 offset = - ratio * int_cst_value (cbase);
3194 cost += difference_cost (data,
3195 ubase, integer_zero_node,
3196 &symbol_present, &var_present, &offset,
3197 depends_on);
3199 else if (ratio == 1)
3201 cost += difference_cost (data,
3202 ubase, cbase,
3203 &symbol_present, &var_present, &offset,
3204 depends_on);
3206 else
3208 cost += force_var_cost (data, cbase, depends_on);
3209 cost += add_cost (TYPE_MODE (ctype));
3210 cost += difference_cost (data,
3211 ubase, integer_zero_node,
3212 &symbol_present, &var_present, &offset,
3213 depends_on);
3216 /* If we are after the increment, the value of the candidate is higher by
3217 one iteration. */
3218 if (stmt_after_increment (data->current_loop, cand, at))
3219 offset -= ratio * cstepi;
3221 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3222 (symbol/var/const parts may be omitted). If we are looking for an address,
3223 find the cost of addressing this. */
3224 if (address_p)
3225 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3227 /* Otherwise estimate the costs for computing the expression. */
3228 aratio = ratio > 0 ? ratio : -ratio;
3229 if (!symbol_present && !var_present && !offset)
3231 if (ratio != 1)
3232 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3234 return cost;
3237 if (aratio != 1)
3238 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3240 n_sums = 1;
3241 if (var_present
3242 /* Symbol + offset should be compile-time computable. */
3243 && (symbol_present || offset))
3244 n_sums++;
3246 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3248 fallback:
3250 /* Just get the expression, expand it and measure the cost. */
3251 tree comp = get_computation_at (data->current_loop, use, cand, at);
3253 if (!comp)
3254 return INFTY;
3256 if (address_p)
3257 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3259 return computation_cost (comp);
3263 /* Determines the cost of the computation by that USE is expressed
3264 from induction variable CAND. If ADDRESS_P is true, we just need
3265 to create an address from it, otherwise we want to get it into
3266 register. A set of invariants we depend on is stored in
3267 DEPENDS_ON. */
3269 static unsigned
3270 get_computation_cost (struct ivopts_data *data,
3271 struct iv_use *use, struct iv_cand *cand,
3272 bool address_p, bitmap *depends_on)
3274 return get_computation_cost_at (data,
3275 use, cand, address_p, depends_on, use->stmt);
3278 /* Determines cost of basing replacement of USE on CAND in a generic
3279 expression. */
3281 static bool
3282 determine_use_iv_cost_generic (struct ivopts_data *data,
3283 struct iv_use *use, struct iv_cand *cand)
3285 bitmap depends_on;
3286 unsigned cost;
3288 /* The simple case first -- if we need to express value of the preserved
3289 original biv, the cost is 0. This also prevents us from counting the
3290 cost of increment twice -- once at this use and once in the cost of
3291 the candidate. */
3292 if (cand->pos == IP_ORIGINAL
3293 && cand->incremented_at == use->stmt)
3295 set_use_iv_cost (data, use, cand, 0, NULL);
3296 return true;
3299 cost = get_computation_cost (data, use, cand, false, &depends_on);
3300 set_use_iv_cost (data, use, cand, cost, depends_on);
3302 return cost != INFTY;
3305 /* Determines cost of basing replacement of USE on CAND in an address. */
3307 static bool
3308 determine_use_iv_cost_address (struct ivopts_data *data,
3309 struct iv_use *use, struct iv_cand *cand)
3311 bitmap depends_on;
3312 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3314 set_use_iv_cost (data, use, cand, cost, depends_on);
3316 return cost != INFTY;
3319 /* Computes value of induction variable IV in iteration NITER. */
3321 static tree
3322 iv_value (struct iv *iv, tree niter)
3324 tree val;
3325 tree type = TREE_TYPE (iv->base);
3327 niter = fold_convert (type, niter);
3328 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3330 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3333 /* Computes value of candidate CAND at position AT in iteration NITER. */
3335 static tree
3336 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3338 tree val = iv_value (cand->iv, niter);
3339 tree type = TREE_TYPE (cand->iv->base);
3341 if (stmt_after_increment (loop, cand, at))
3342 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3344 return val;
3347 /* Returns period of induction variable iv. */
3349 static tree
3350 iv_period (struct iv *iv)
3352 tree step = iv->step, period, type;
3353 tree pow2div;
3355 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3357 /* Period of the iv is gcd (step, type range). Since type range is power
3358 of two, it suffices to determine the maximum power of two that divides
3359 step. */
3360 pow2div = num_ending_zeros (step);
3361 type = unsigned_type_for (TREE_TYPE (step));
3363 period = build_low_bits_mask (type,
3364 (TYPE_PRECISION (type)
3365 - tree_low_cst (pow2div, 1)));
3367 return period;
3370 /* Check whether it is possible to express the condition in USE by comparison
3371 of candidate CAND. If so, store the comparison code to COMPARE and the
3372 value compared with to BOUND. */
3374 static bool
3375 may_eliminate_iv (struct ivopts_data *data,
3376 struct iv_use *use, struct iv_cand *cand,
3377 enum tree_code *compare, tree *bound)
3379 basic_block ex_bb;
3380 edge exit;
3381 struct tree_niter_desc *niter;
3382 tree nit, nit_type;
3383 tree wider_type, period, per_type;
3384 struct loop *loop = data->current_loop;
3386 /* For now works only for exits that dominate the loop latch. TODO -- extend
3387 for other conditions inside loop body. */
3388 ex_bb = bb_for_stmt (use->stmt);
3389 if (use->stmt != last_stmt (ex_bb)
3390 || TREE_CODE (use->stmt) != COND_EXPR)
3391 return false;
3392 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3393 return false;
3395 exit = EDGE_SUCC (ex_bb, 0);
3396 if (flow_bb_inside_loop_p (loop, exit->dest))
3397 exit = EDGE_SUCC (ex_bb, 1);
3398 if (flow_bb_inside_loop_p (loop, exit->dest))
3399 return false;
3401 niter = niter_for_exit (data, exit);
3402 if (!niter
3403 || !zero_p (niter->may_be_zero))
3404 return false;
3406 nit = niter->niter;
3407 nit_type = TREE_TYPE (nit);
3409 /* Determine whether we may use the variable to test whether niter iterations
3410 elapsed. This is the case iff the period of the induction variable is
3411 greater than the number of iterations. */
3412 period = iv_period (cand->iv);
3413 if (!period)
3414 return false;
3415 per_type = TREE_TYPE (period);
3417 wider_type = TREE_TYPE (period);
3418 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3419 wider_type = per_type;
3420 else
3421 wider_type = nit_type;
3423 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3424 fold_convert (wider_type, period),
3425 fold_convert (wider_type, nit)))))
3426 return false;
3428 if (exit->flags & EDGE_TRUE_VALUE)
3429 *compare = EQ_EXPR;
3430 else
3431 *compare = NE_EXPR;
3433 *bound = cand_value_at (loop, cand, use->stmt, nit);
3434 return true;
3437 /* Determines cost of basing replacement of USE on CAND in a condition. */
3439 static bool
3440 determine_use_iv_cost_condition (struct ivopts_data *data,
3441 struct iv_use *use, struct iv_cand *cand)
3443 tree bound;
3444 enum tree_code compare;
3446 /* Only consider real candidates. */
3447 if (!cand->iv)
3449 set_use_iv_cost (data, use, cand, INFTY, NULL);
3450 return false;
3453 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3455 bitmap depends_on = NULL;
3456 unsigned cost = force_var_cost (data, bound, &depends_on);
3458 set_use_iv_cost (data, use, cand, cost, depends_on);
3459 return cost != INFTY;
3462 /* The induction variable elimination failed; just express the original
3463 giv. If it is compared with an invariant, note that we cannot get
3464 rid of it. */
3465 if (TREE_CODE (*use->op_p) == SSA_NAME)
3466 record_invariant (data, *use->op_p, true);
3467 else
3469 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3470 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3473 return determine_use_iv_cost_generic (data, use, cand);
3476 /* Checks whether it is possible to replace the final value of USE by
3477 a direct computation. If so, the formula is stored to *VALUE. */
3479 static bool
3480 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3481 tree *value)
3483 struct loop *loop = data->current_loop;
3484 edge exit;
3485 struct tree_niter_desc *niter;
3487 exit = single_dom_exit (loop);
3488 if (!exit)
3489 return false;
3491 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3492 bb_for_stmt (use->stmt)));
3494 niter = niter_for_single_dom_exit (data);
3495 if (!niter
3496 || !zero_p (niter->may_be_zero))
3497 return false;
3499 *value = iv_value (use->iv, niter->niter);
3501 return true;
3504 /* Determines cost of replacing final value of USE using CAND. */
3506 static bool
3507 determine_use_iv_cost_outer (struct ivopts_data *data,
3508 struct iv_use *use, struct iv_cand *cand)
3510 bitmap depends_on;
3511 unsigned cost;
3512 edge exit;
3513 tree value;
3514 struct loop *loop = data->current_loop;
3516 /* The simple case first -- if we need to express value of the preserved
3517 original biv, the cost is 0. This also prevents us from counting the
3518 cost of increment twice -- once at this use and once in the cost of
3519 the candidate. */
3520 if (cand->pos == IP_ORIGINAL
3521 && cand->incremented_at == use->stmt)
3523 set_use_iv_cost (data, use, cand, 0, NULL);
3524 return true;
3527 if (!cand->iv)
3529 if (!may_replace_final_value (data, use, &value))
3531 set_use_iv_cost (data, use, cand, INFTY, NULL);
3532 return false;
3535 depends_on = NULL;
3536 cost = force_var_cost (data, value, &depends_on);
3538 cost /= AVG_LOOP_NITER (loop);
3540 set_use_iv_cost (data, use, cand, cost, depends_on);
3541 return cost != INFTY;
3544 exit = single_dom_exit (loop);
3545 if (exit)
3547 /* If there is just a single exit, we may use value of the candidate
3548 after we take it to determine the value of use. */
3549 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3550 last_stmt (exit->src));
3551 if (cost != INFTY)
3552 cost /= AVG_LOOP_NITER (loop);
3554 else
3556 /* Otherwise we just need to compute the iv. */
3557 cost = get_computation_cost (data, use, cand, false, &depends_on);
3560 set_use_iv_cost (data, use, cand, cost, depends_on);
3562 return cost != INFTY;
3565 /* Determines cost of basing replacement of USE on CAND. Returns false
3566 if USE cannot be based on CAND. */
3568 static bool
3569 determine_use_iv_cost (struct ivopts_data *data,
3570 struct iv_use *use, struct iv_cand *cand)
3572 switch (use->type)
3574 case USE_NONLINEAR_EXPR:
3575 return determine_use_iv_cost_generic (data, use, cand);
3577 case USE_OUTER:
3578 return determine_use_iv_cost_outer (data, use, cand);
3580 case USE_ADDRESS:
3581 return determine_use_iv_cost_address (data, use, cand);
3583 case USE_COMPARE:
3584 return determine_use_iv_cost_condition (data, use, cand);
3586 default:
3587 gcc_unreachable ();
3591 /* Determines costs of basing the use of the iv on an iv candidate. */
3593 static void
3594 determine_use_iv_costs (struct ivopts_data *data)
3596 unsigned i, j;
3597 struct iv_use *use;
3598 struct iv_cand *cand;
3599 bitmap to_clear = BITMAP_ALLOC (NULL);
3601 alloc_use_cost_map (data);
3603 for (i = 0; i < n_iv_uses (data); i++)
3605 use = iv_use (data, i);
3607 if (data->consider_all_candidates)
3609 for (j = 0; j < n_iv_cands (data); j++)
3611 cand = iv_cand (data, j);
3612 determine_use_iv_cost (data, use, cand);
3615 else
3617 bitmap_iterator bi;
3619 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3621 cand = iv_cand (data, j);
3622 if (!determine_use_iv_cost (data, use, cand))
3623 bitmap_set_bit (to_clear, j);
3626 /* Remove the candidates for that the cost is infinite from
3627 the list of related candidates. */
3628 bitmap_and_compl_into (use->related_cands, to_clear);
3629 bitmap_clear (to_clear);
3633 BITMAP_FREE (to_clear);
3635 if (dump_file && (dump_flags & TDF_DETAILS))
3637 fprintf (dump_file, "Use-candidate costs:\n");
3639 for (i = 0; i < n_iv_uses (data); i++)
3641 use = iv_use (data, i);
3643 fprintf (dump_file, "Use %d:\n", i);
3644 fprintf (dump_file, " cand\tcost\tdepends on\n");
3645 for (j = 0; j < use->n_map_members; j++)
3647 if (!use->cost_map[j].cand
3648 || use->cost_map[j].cost == INFTY)
3649 continue;
3651 fprintf (dump_file, " %d\t%d\t",
3652 use->cost_map[j].cand->id,
3653 use->cost_map[j].cost);
3654 if (use->cost_map[j].depends_on)
3655 bitmap_print (dump_file,
3656 use->cost_map[j].depends_on, "","");
3657 fprintf (dump_file, "\n");
3660 fprintf (dump_file, "\n");
3662 fprintf (dump_file, "\n");
3666 /* Determines cost of the candidate CAND. */
3668 static void
3669 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3671 unsigned cost_base, cost_step;
3672 tree base;
3674 if (!cand->iv)
3676 cand->cost = 0;
3677 return;
3680 /* There are two costs associated with the candidate -- its increment
3681 and its initialization. The second is almost negligible for any loop
3682 that rolls enough, so we take it just very little into account. */
3684 base = cand->iv->base;
3685 cost_base = force_var_cost (data, base, NULL);
3686 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3688 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3690 /* Prefer the original iv unless we may gain something by replacing it;
3691 this is not really relevant for artificial ivs created by other
3692 passes. */
3693 if (cand->pos == IP_ORIGINAL
3694 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3695 cand->cost--;
3697 /* Prefer not to insert statements into latch unless there are some
3698 already (so that we do not create unnecessary jumps). */
3699 if (cand->pos == IP_END
3700 && empty_block_p (ip_end_pos (data->current_loop)))
3701 cand->cost++;
3704 /* Determines costs of computation of the candidates. */
3706 static void
3707 determine_iv_costs (struct ivopts_data *data)
3709 unsigned i;
3711 if (dump_file && (dump_flags & TDF_DETAILS))
3713 fprintf (dump_file, "Candidate costs:\n");
3714 fprintf (dump_file, " cand\tcost\n");
3717 for (i = 0; i < n_iv_cands (data); i++)
3719 struct iv_cand *cand = iv_cand (data, i);
3721 determine_iv_cost (data, cand);
3723 if (dump_file && (dump_flags & TDF_DETAILS))
3724 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3727 if (dump_file && (dump_flags & TDF_DETAILS))
3728 fprintf (dump_file, "\n");
3731 /* Calculates cost for having SIZE induction variables. */
3733 static unsigned
3734 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3736 return global_cost_for_size (size,
3737 loop_data (data->current_loop)->regs_used,
3738 n_iv_uses (data));
3741 /* For each size of the induction variable set determine the penalty. */
3743 static void
3744 determine_set_costs (struct ivopts_data *data)
3746 unsigned j, n;
3747 tree phi, op;
3748 struct loop *loop = data->current_loop;
3749 bitmap_iterator bi;
3751 /* We use the following model (definitely improvable, especially the
3752 cost function -- TODO):
3754 We estimate the number of registers available (using MD data), name it A.
3756 We estimate the number of registers used by the loop, name it U. This
3757 number is obtained as the number of loop phi nodes (not counting virtual
3758 registers and bivs) + the number of variables from outside of the loop.
3760 We set a reserve R (free regs that are used for temporary computations,
3761 etc.). For now the reserve is a constant 3.
3763 Let I be the number of induction variables.
3765 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3766 make a lot of ivs without a reason).
3767 -- if A - R < U + I <= A, the cost is I * PRES_COST
3768 -- if U + I > A, the cost is I * PRES_COST and
3769 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3771 if (dump_file && (dump_flags & TDF_DETAILS))
3773 fprintf (dump_file, "Global costs:\n");
3774 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3775 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3776 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3777 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3780 n = 0;
3781 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3783 op = PHI_RESULT (phi);
3785 if (!is_gimple_reg (op))
3786 continue;
3788 if (get_iv (data, op))
3789 continue;
3791 n++;
3794 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3796 struct version_info *info = ver_info (data, j);
3798 if (info->inv_id && info->has_nonlin_use)
3799 n++;
3802 loop_data (loop)->regs_used = n;
3803 if (dump_file && (dump_flags & TDF_DETAILS))
3804 fprintf (dump_file, " regs_used %d\n", n);
3806 if (dump_file && (dump_flags & TDF_DETAILS))
3808 fprintf (dump_file, " cost for size:\n");
3809 fprintf (dump_file, " ivs\tcost\n");
3810 for (j = 0; j <= 2 * target_avail_regs; j++)
3811 fprintf (dump_file, " %d\t%d\n", j,
3812 ivopts_global_cost_for_size (data, j));
3813 fprintf (dump_file, "\n");
3817 /* Returns true if A is a cheaper cost pair than B. */
3819 static bool
3820 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3822 if (!a)
3823 return false;
3825 if (!b)
3826 return true;
3828 if (a->cost < b->cost)
3829 return true;
3831 if (a->cost > b->cost)
3832 return false;
3834 /* In case the costs are the same, prefer the cheaper candidate. */
3835 if (a->cand->cost < b->cand->cost)
3836 return true;
3838 return false;
3841 /* Computes the cost field of IVS structure. */
3843 static void
3844 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3846 unsigned cost = 0;
3848 cost += ivs->cand_use_cost;
3849 cost += ivs->cand_cost;
3850 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3852 ivs->cost = cost;
3855 /* Set USE not to be expressed by any candidate in IVS. */
3857 static void
3858 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3859 struct iv_use *use)
3861 unsigned uid = use->id, cid, iid;
3862 bitmap deps;
3863 struct cost_pair *cp;
3864 bitmap_iterator bi;
3866 cp = ivs->cand_for_use[uid];
3867 if (!cp)
3868 return;
3869 cid = cp->cand->id;
3871 ivs->bad_uses++;
3872 ivs->cand_for_use[uid] = NULL;
3873 ivs->n_cand_uses[cid]--;
3875 if (ivs->n_cand_uses[cid] == 0)
3877 bitmap_clear_bit (ivs->cands, cid);
3878 /* Do not count the pseudocandidates. */
3879 if (cp->cand->iv)
3880 ivs->n_regs--;
3881 ivs->n_cands--;
3882 ivs->cand_cost -= cp->cand->cost;
3885 ivs->cand_use_cost -= cp->cost;
3887 deps = cp->depends_on;
3889 if (deps)
3891 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3893 ivs->n_invariant_uses[iid]--;
3894 if (ivs->n_invariant_uses[iid] == 0)
3895 ivs->n_regs--;
3899 iv_ca_recount_cost (data, ivs);
3902 /* Set cost pair for USE in set IVS to CP. */
3904 static void
3905 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3906 struct iv_use *use, struct cost_pair *cp)
3908 unsigned uid = use->id, cid, iid;
3909 bitmap deps;
3910 bitmap_iterator bi;
3912 if (ivs->cand_for_use[uid] == cp)
3913 return;
3915 if (ivs->cand_for_use[uid])
3916 iv_ca_set_no_cp (data, ivs, use);
3918 if (cp)
3920 cid = cp->cand->id;
3922 ivs->bad_uses--;
3923 ivs->cand_for_use[uid] = cp;
3924 ivs->n_cand_uses[cid]++;
3925 if (ivs->n_cand_uses[cid] == 1)
3927 bitmap_set_bit (ivs->cands, cid);
3928 /* Do not count the pseudocandidates. */
3929 if (cp->cand->iv)
3930 ivs->n_regs++;
3931 ivs->n_cands++;
3932 ivs->cand_cost += cp->cand->cost;
3935 ivs->cand_use_cost += cp->cost;
3937 deps = cp->depends_on;
3939 if (deps)
3941 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3943 ivs->n_invariant_uses[iid]++;
3944 if (ivs->n_invariant_uses[iid] == 1)
3945 ivs->n_regs++;
3949 iv_ca_recount_cost (data, ivs);
3953 /* Extend set IVS by expressing USE by some of the candidates in it
3954 if possible. */
3956 static void
3957 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3958 struct iv_use *use)
3960 struct cost_pair *best_cp = NULL, *cp;
3961 bitmap_iterator bi;
3962 unsigned i;
3964 gcc_assert (ivs->upto >= use->id);
3966 if (ivs->upto == use->id)
3968 ivs->upto++;
3969 ivs->bad_uses++;
3972 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3974 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3976 if (cheaper_cost_pair (cp, best_cp))
3977 best_cp = cp;
3980 iv_ca_set_cp (data, ivs, use, best_cp);
3983 /* Get cost for assignment IVS. */
3985 static unsigned
3986 iv_ca_cost (struct iv_ca *ivs)
3988 return (ivs->bad_uses ? INFTY : ivs->cost);
3991 /* Returns true if all dependences of CP are among invariants in IVS. */
3993 static bool
3994 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
3996 unsigned i;
3997 bitmap_iterator bi;
3999 if (!cp->depends_on)
4000 return true;
4002 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4004 if (ivs->n_invariant_uses[i] == 0)
4005 return false;
4008 return true;
4011 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4012 it before NEXT_CHANGE. */
4014 static struct iv_ca_delta *
4015 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4016 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4018 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4020 change->use = use;
4021 change->old_cp = old_cp;
4022 change->new_cp = new_cp;
4023 change->next_change = next_change;
4025 return change;
4028 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4029 are rewritten. */
4031 static struct iv_ca_delta *
4032 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4034 struct iv_ca_delta *last;
4036 if (!l2)
4037 return l1;
4039 if (!l1)
4040 return l2;
4042 for (last = l1; last->next_change; last = last->next_change)
4043 continue;
4044 last->next_change = l2;
4046 return l1;
4049 /* Returns candidate by that USE is expressed in IVS. */
4051 static struct cost_pair *
4052 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4054 return ivs->cand_for_use[use->id];
4057 /* Reverse the list of changes DELTA, forming the inverse to it. */
4059 static struct iv_ca_delta *
4060 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4062 struct iv_ca_delta *act, *next, *prev = NULL;
4063 struct cost_pair *tmp;
4065 for (act = delta; act; act = next)
4067 next = act->next_change;
4068 act->next_change = prev;
4069 prev = act;
4071 tmp = act->old_cp;
4072 act->old_cp = act->new_cp;
4073 act->new_cp = tmp;
4076 return prev;
4079 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4080 reverted instead. */
4082 static void
4083 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4084 struct iv_ca_delta *delta, bool forward)
4086 struct cost_pair *from, *to;
4087 struct iv_ca_delta *act;
4089 if (!forward)
4090 delta = iv_ca_delta_reverse (delta);
4092 for (act = delta; act; act = act->next_change)
4094 from = act->old_cp;
4095 to = act->new_cp;
4096 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4097 iv_ca_set_cp (data, ivs, act->use, to);
4100 if (!forward)
4101 iv_ca_delta_reverse (delta);
4104 /* Returns true if CAND is used in IVS. */
4106 static bool
4107 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4109 return ivs->n_cand_uses[cand->id] > 0;
4112 /* Returns number of induction variable candidates in the set IVS. */
4114 static unsigned
4115 iv_ca_n_cands (struct iv_ca *ivs)
4117 return ivs->n_cands;
4120 /* Free the list of changes DELTA. */
4122 static void
4123 iv_ca_delta_free (struct iv_ca_delta **delta)
4125 struct iv_ca_delta *act, *next;
4127 for (act = *delta; act; act = next)
4129 next = act->next_change;
4130 free (act);
4133 *delta = NULL;
4136 /* Allocates new iv candidates assignment. */
4138 static struct iv_ca *
4139 iv_ca_new (struct ivopts_data *data)
4141 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4143 nw->upto = 0;
4144 nw->bad_uses = 0;
4145 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4146 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4147 nw->cands = BITMAP_ALLOC (NULL);
4148 nw->n_cands = 0;
4149 nw->n_regs = 0;
4150 nw->cand_use_cost = 0;
4151 nw->cand_cost = 0;
4152 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4153 nw->cost = 0;
4155 return nw;
4158 /* Free memory occupied by the set IVS. */
4160 static void
4161 iv_ca_free (struct iv_ca **ivs)
4163 free ((*ivs)->cand_for_use);
4164 free ((*ivs)->n_cand_uses);
4165 BITMAP_FREE ((*ivs)->cands);
4166 free ((*ivs)->n_invariant_uses);
4167 free (*ivs);
4168 *ivs = NULL;
4171 /* Dumps IVS to FILE. */
4173 static void
4174 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4176 const char *pref = " invariants ";
4177 unsigned i;
4179 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4180 bitmap_print (file, ivs->cands, " candidates ","\n");
4182 for (i = 1; i <= data->max_inv_id; i++)
4183 if (ivs->n_invariant_uses[i])
4185 fprintf (file, "%s%d", pref, i);
4186 pref = ", ";
4188 fprintf (file, "\n");
4191 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4192 new set, and store differences in DELTA. Number of induction variables
4193 in the new set is stored to N_IVS. */
4195 static unsigned
4196 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4197 struct iv_cand *cand, struct iv_ca_delta **delta,
4198 unsigned *n_ivs)
4200 unsigned i, cost;
4201 struct iv_use *use;
4202 struct cost_pair *old_cp, *new_cp;
4204 *delta = NULL;
4205 for (i = 0; i < ivs->upto; i++)
4207 use = iv_use (data, i);
4208 old_cp = iv_ca_cand_for_use (ivs, use);
4210 if (old_cp
4211 && old_cp->cand == cand)
4212 continue;
4214 new_cp = get_use_iv_cost (data, use, cand);
4215 if (!new_cp)
4216 continue;
4218 if (!iv_ca_has_deps (ivs, new_cp))
4219 continue;
4221 if (!cheaper_cost_pair (new_cp, old_cp))
4222 continue;
4224 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4227 iv_ca_delta_commit (data, ivs, *delta, true);
4228 cost = iv_ca_cost (ivs);
4229 if (n_ivs)
4230 *n_ivs = iv_ca_n_cands (ivs);
4231 iv_ca_delta_commit (data, ivs, *delta, false);
4233 return cost;
4236 /* Try narrowing set IVS by removing CAND. Return the cost of
4237 the new set and store the differences in DELTA. */
4239 static unsigned
4240 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4241 struct iv_cand *cand, struct iv_ca_delta **delta)
4243 unsigned i, ci;
4244 struct iv_use *use;
4245 struct cost_pair *old_cp, *new_cp, *cp;
4246 bitmap_iterator bi;
4247 struct iv_cand *cnd;
4248 unsigned cost;
4250 *delta = NULL;
4251 for (i = 0; i < n_iv_uses (data); i++)
4253 use = iv_use (data, i);
4255 old_cp = iv_ca_cand_for_use (ivs, use);
4256 if (old_cp->cand != cand)
4257 continue;
4259 new_cp = NULL;
4261 if (data->consider_all_candidates)
4263 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4265 if (ci == cand->id)
4266 continue;
4268 cnd = iv_cand (data, ci);
4270 cp = get_use_iv_cost (data, use, cnd);
4271 if (!cp)
4272 continue;
4273 if (!iv_ca_has_deps (ivs, cp))
4274 continue;
4276 if (!cheaper_cost_pair (cp, new_cp))
4277 continue;
4279 new_cp = cp;
4282 else
4284 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4286 if (ci == cand->id)
4287 continue;
4289 cnd = iv_cand (data, ci);
4291 cp = get_use_iv_cost (data, use, cnd);
4292 if (!cp)
4293 continue;
4294 if (!iv_ca_has_deps (ivs, cp))
4295 continue;
4297 if (!cheaper_cost_pair (cp, new_cp))
4298 continue;
4300 new_cp = cp;
4304 if (!new_cp)
4306 iv_ca_delta_free (delta);
4307 return INFTY;
4310 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4313 iv_ca_delta_commit (data, ivs, *delta, true);
4314 cost = iv_ca_cost (ivs);
4315 iv_ca_delta_commit (data, ivs, *delta, false);
4317 return cost;
4320 /* Try optimizing the set of candidates IVS by removing candidates different
4321 from to EXCEPT_CAND from it. Return cost of the new set, and store
4322 differences in DELTA. */
4324 static unsigned
4325 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4326 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4328 bitmap_iterator bi;
4329 struct iv_ca_delta *act_delta, *best_delta;
4330 unsigned i, best_cost, acost;
4331 struct iv_cand *cand;
4333 best_delta = NULL;
4334 best_cost = iv_ca_cost (ivs);
4336 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4338 cand = iv_cand (data, i);
4340 if (cand == except_cand)
4341 continue;
4343 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4345 if (acost < best_cost)
4347 best_cost = acost;
4348 iv_ca_delta_free (&best_delta);
4349 best_delta = act_delta;
4351 else
4352 iv_ca_delta_free (&act_delta);
4355 if (!best_delta)
4357 *delta = NULL;
4358 return best_cost;
4361 /* Recurse to possibly remove other unnecessary ivs. */
4362 iv_ca_delta_commit (data, ivs, best_delta, true);
4363 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4364 iv_ca_delta_commit (data, ivs, best_delta, false);
4365 *delta = iv_ca_delta_join (best_delta, *delta);
4366 return best_cost;
4369 /* Tries to extend the sets IVS in the best possible way in order
4370 to express the USE. */
4372 static bool
4373 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4374 struct iv_use *use)
4376 unsigned best_cost, act_cost;
4377 unsigned i;
4378 bitmap_iterator bi;
4379 struct iv_cand *cand;
4380 struct iv_ca_delta *best_delta = NULL, *act_delta;
4381 struct cost_pair *cp;
4383 iv_ca_add_use (data, ivs, use);
4384 best_cost = iv_ca_cost (ivs);
4386 cp = iv_ca_cand_for_use (ivs, use);
4387 if (cp)
4389 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4390 iv_ca_set_no_cp (data, ivs, use);
4393 /* First try important candidates. Only if it fails, try the specific ones.
4394 Rationale -- in loops with many variables the best choice often is to use
4395 just one generic biv. If we added here many ivs specific to the uses,
4396 the optimization algorithm later would be likely to get stuck in a local
4397 minimum, thus causing us to create too many ivs. The approach from
4398 few ivs to more seems more likely to be successful -- starting from few
4399 ivs, replacing an expensive use by a specific iv should always be a
4400 win. */
4401 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4403 cand = iv_cand (data, i);
4405 if (iv_ca_cand_used_p (ivs, cand))
4406 continue;
4408 cp = get_use_iv_cost (data, use, cand);
4409 if (!cp)
4410 continue;
4412 iv_ca_set_cp (data, ivs, use, cp);
4413 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4414 iv_ca_set_no_cp (data, ivs, use);
4415 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4417 if (act_cost < best_cost)
4419 best_cost = act_cost;
4421 iv_ca_delta_free (&best_delta);
4422 best_delta = act_delta;
4424 else
4425 iv_ca_delta_free (&act_delta);
4428 if (best_cost == INFTY)
4430 for (i = 0; i < use->n_map_members; i++)
4432 cp = use->cost_map + i;
4433 cand = cp->cand;
4434 if (!cand)
4435 continue;
4437 /* Already tried this. */
4438 if (cand->important)
4439 continue;
4441 if (iv_ca_cand_used_p (ivs, cand))
4442 continue;
4444 act_delta = NULL;
4445 iv_ca_set_cp (data, ivs, use, cp);
4446 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4447 iv_ca_set_no_cp (data, ivs, use);
4448 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4449 cp, act_delta);
4451 if (act_cost < best_cost)
4453 best_cost = act_cost;
4455 if (best_delta)
4456 iv_ca_delta_free (&best_delta);
4457 best_delta = act_delta;
4459 else
4460 iv_ca_delta_free (&act_delta);
4464 iv_ca_delta_commit (data, ivs, best_delta, true);
4465 iv_ca_delta_free (&best_delta);
4467 return (best_cost != INFTY);
4470 /* Finds an initial assignment of candidates to uses. */
4472 static struct iv_ca *
4473 get_initial_solution (struct ivopts_data *data)
4475 struct iv_ca *ivs = iv_ca_new (data);
4476 unsigned i;
4478 for (i = 0; i < n_iv_uses (data); i++)
4479 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4481 iv_ca_free (&ivs);
4482 return NULL;
4485 return ivs;
4488 /* Tries to improve set of induction variables IVS. */
4490 static bool
4491 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4493 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4494 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4495 struct iv_cand *cand;
4497 /* Try extending the set of induction variables by one. */
4498 for (i = 0; i < n_iv_cands (data); i++)
4500 cand = iv_cand (data, i);
4502 if (iv_ca_cand_used_p (ivs, cand))
4503 continue;
4505 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4506 if (!act_delta)
4507 continue;
4509 /* If we successfully added the candidate and the set is small enough,
4510 try optimizing it by removing other candidates. */
4511 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4513 iv_ca_delta_commit (data, ivs, act_delta, true);
4514 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4515 iv_ca_delta_commit (data, ivs, act_delta, false);
4516 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4519 if (acost < best_cost)
4521 best_cost = acost;
4522 iv_ca_delta_free (&best_delta);
4523 best_delta = act_delta;
4525 else
4526 iv_ca_delta_free (&act_delta);
4529 if (!best_delta)
4531 /* Try removing the candidates from the set instead. */
4532 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4534 /* Nothing more we can do. */
4535 if (!best_delta)
4536 return false;
4539 iv_ca_delta_commit (data, ivs, best_delta, true);
4540 gcc_assert (best_cost == iv_ca_cost (ivs));
4541 iv_ca_delta_free (&best_delta);
4542 return true;
4545 /* Attempts to find the optimal set of induction variables. We do simple
4546 greedy heuristic -- we try to replace at most one candidate in the selected
4547 solution and remove the unused ivs while this improves the cost. */
4549 static struct iv_ca *
4550 find_optimal_iv_set (struct ivopts_data *data)
4552 unsigned i;
4553 struct iv_ca *set;
4554 struct iv_use *use;
4556 /* Get the initial solution. */
4557 set = get_initial_solution (data);
4558 if (!set)
4560 if (dump_file && (dump_flags & TDF_DETAILS))
4561 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4562 return NULL;
4565 if (dump_file && (dump_flags & TDF_DETAILS))
4567 fprintf (dump_file, "Initial set of candidates:\n");
4568 iv_ca_dump (data, dump_file, set);
4571 while (try_improve_iv_set (data, set))
4573 if (dump_file && (dump_flags & TDF_DETAILS))
4575 fprintf (dump_file, "Improved to:\n");
4576 iv_ca_dump (data, dump_file, set);
4580 if (dump_file && (dump_flags & TDF_DETAILS))
4581 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4583 for (i = 0; i < n_iv_uses (data); i++)
4585 use = iv_use (data, i);
4586 use->selected = iv_ca_cand_for_use (set, use)->cand;
4589 return set;
4592 /* Creates a new induction variable corresponding to CAND. */
4594 static void
4595 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4597 block_stmt_iterator incr_pos;
4598 tree base;
4599 bool after = false;
4601 if (!cand->iv)
4602 return;
4604 switch (cand->pos)
4606 case IP_NORMAL:
4607 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4608 break;
4610 case IP_END:
4611 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4612 after = true;
4613 break;
4615 case IP_ORIGINAL:
4616 /* Mark that the iv is preserved. */
4617 name_info (data, cand->var_before)->preserve_biv = true;
4618 name_info (data, cand->var_after)->preserve_biv = true;
4620 /* Rewrite the increment so that it uses var_before directly. */
4621 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4623 return;
4626 gimple_add_tmp_var (cand->var_before);
4627 add_referenced_tmp_var (cand->var_before);
4629 base = unshare_expr (cand->iv->base);
4631 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4632 &incr_pos, after, &cand->var_before, &cand->var_after);
4635 /* Creates new induction variables described in SET. */
4637 static void
4638 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4640 unsigned i;
4641 struct iv_cand *cand;
4642 bitmap_iterator bi;
4644 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4646 cand = iv_cand (data, i);
4647 create_new_iv (data, cand);
4651 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4652 is true, remove also the ssa name defined by the statement. */
4654 static void
4655 remove_statement (tree stmt, bool including_defined_name)
4657 if (TREE_CODE (stmt) == PHI_NODE)
4659 if (!including_defined_name)
4661 /* Prevent the ssa name defined by the statement from being removed. */
4662 SET_PHI_RESULT (stmt, NULL);
4664 remove_phi_node (stmt, NULL_TREE);
4666 else
4668 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4670 bsi_remove (&bsi);
4674 /* Rewrites USE (definition of iv used in a nonlinear expression)
4675 using candidate CAND. */
4677 static void
4678 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4679 struct iv_use *use, struct iv_cand *cand)
4681 tree comp;
4682 tree op, stmts, tgt, ass;
4683 block_stmt_iterator bsi, pbsi;
4685 /* An important special case -- if we are asked to express value of
4686 the original iv by itself, just exit; there is no need to
4687 introduce a new computation (that might also need casting the
4688 variable to unsigned and back). */
4689 if (cand->pos == IP_ORIGINAL
4690 && TREE_CODE (use->stmt) == MODIFY_EXPR
4691 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4693 op = TREE_OPERAND (use->stmt, 1);
4695 /* Be a bit careful. In case variable is expressed in some
4696 complicated way, rewrite it so that we may get rid of this
4697 complicated expression. */
4698 if ((TREE_CODE (op) == PLUS_EXPR
4699 || TREE_CODE (op) == MINUS_EXPR)
4700 && TREE_OPERAND (op, 0) == cand->var_before
4701 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4702 return;
4705 comp = unshare_expr (get_computation (data->current_loop,
4706 use, cand));
4707 switch (TREE_CODE (use->stmt))
4709 case PHI_NODE:
4710 tgt = PHI_RESULT (use->stmt);
4712 /* If we should keep the biv, do not replace it. */
4713 if (name_info (data, tgt)->preserve_biv)
4714 return;
4716 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4717 while (!bsi_end_p (pbsi)
4718 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4720 bsi = pbsi;
4721 bsi_next (&pbsi);
4723 break;
4725 case MODIFY_EXPR:
4726 tgt = TREE_OPERAND (use->stmt, 0);
4727 bsi = bsi_for_stmt (use->stmt);
4728 break;
4730 default:
4731 gcc_unreachable ();
4734 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4736 if (TREE_CODE (use->stmt) == PHI_NODE)
4738 if (stmts)
4739 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4740 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4741 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4742 remove_statement (use->stmt, false);
4743 SSA_NAME_DEF_STMT (tgt) = ass;
4745 else
4747 if (stmts)
4748 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4749 TREE_OPERAND (use->stmt, 1) = op;
4753 /* Replaces ssa name in index IDX by its basic variable. Callback for
4754 for_each_index. */
4756 static bool
4757 idx_remove_ssa_names (tree base, tree *idx,
4758 void *data ATTRIBUTE_UNUSED)
4760 tree *op;
4762 if (TREE_CODE (*idx) == SSA_NAME)
4763 *idx = SSA_NAME_VAR (*idx);
4765 if (TREE_CODE (base) == ARRAY_REF)
4767 op = &TREE_OPERAND (base, 2);
4768 if (*op
4769 && TREE_CODE (*op) == SSA_NAME)
4770 *op = SSA_NAME_VAR (*op);
4771 op = &TREE_OPERAND (base, 3);
4772 if (*op
4773 && TREE_CODE (*op) == SSA_NAME)
4774 *op = SSA_NAME_VAR (*op);
4777 return true;
4780 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4782 static tree
4783 unshare_and_remove_ssa_names (tree ref)
4785 ref = unshare_expr (ref);
4786 for_each_index (&ref, idx_remove_ssa_names, NULL);
4788 return ref;
4791 /* Rewrites base of memory access OP with expression WITH in statement
4792 pointed to by BSI. */
4794 static void
4795 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4797 tree bvar, var, new_var, new_name, copy, name;
4798 tree orig;
4800 var = bvar = get_base_address (*op);
4802 if (!var || TREE_CODE (with) != SSA_NAME)
4803 goto do_rewrite;
4805 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4806 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4807 if (TREE_CODE (var) == INDIRECT_REF)
4808 var = TREE_OPERAND (var, 0);
4809 if (TREE_CODE (var) == SSA_NAME)
4811 name = var;
4812 var = SSA_NAME_VAR (var);
4814 else if (DECL_P (var))
4815 name = NULL_TREE;
4816 else
4817 goto do_rewrite;
4819 if (var_ann (var)->type_mem_tag)
4820 var = var_ann (var)->type_mem_tag;
4822 /* We need to add a memory tag for the variable. But we do not want
4823 to add it to the temporary used for the computations, since this leads
4824 to problems in redundancy elimination when there are common parts
4825 in two computations referring to the different arrays. So we copy
4826 the variable to a new temporary. */
4827 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4828 if (name)
4829 new_name = duplicate_ssa_name (name, copy);
4830 else
4832 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4833 add_referenced_tmp_var (new_var);
4834 var_ann (new_var)->type_mem_tag = var;
4835 new_name = make_ssa_name (new_var, copy);
4837 TREE_OPERAND (copy, 0) = new_name;
4838 update_stmt (copy);
4839 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4840 with = new_name;
4842 do_rewrite:
4844 orig = NULL_TREE;
4845 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4846 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4848 if (TREE_CODE (*op) == INDIRECT_REF)
4849 orig = REF_ORIGINAL (*op);
4850 if (!orig)
4851 orig = unshare_and_remove_ssa_names (*op);
4853 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4855 /* Record the original reference, for purposes of alias analysis. */
4856 REF_ORIGINAL (*op) = orig;
4859 /* Rewrites USE (address that is an iv) using candidate CAND. */
4861 static void
4862 rewrite_use_address (struct ivopts_data *data,
4863 struct iv_use *use, struct iv_cand *cand)
4865 tree comp = unshare_expr (get_computation (data->current_loop,
4866 use, cand));
4867 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4868 tree stmts;
4869 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4871 if (stmts)
4872 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4874 rewrite_address_base (&bsi, use->op_p, op);
4877 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4878 candidate CAND. */
4880 static void
4881 rewrite_use_compare (struct ivopts_data *data,
4882 struct iv_use *use, struct iv_cand *cand)
4884 tree comp;
4885 tree *op_p, cond, op, stmts, bound;
4886 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4887 enum tree_code compare;
4889 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4891 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4892 tree var_type = TREE_TYPE (var);
4894 bound = fold_convert (var_type, bound);
4895 op = force_gimple_operand (unshare_expr (bound), &stmts,
4896 true, NULL_TREE);
4898 if (stmts)
4899 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4901 *use->op_p = build2 (compare, boolean_type_node, var, op);
4902 update_stmt (use->stmt);
4903 return;
4906 /* The induction variable elimination failed; just express the original
4907 giv. */
4908 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4910 cond = *use->op_p;
4911 op_p = &TREE_OPERAND (cond, 0);
4912 if (TREE_CODE (*op_p) != SSA_NAME
4913 || zero_p (get_iv (data, *op_p)->step))
4914 op_p = &TREE_OPERAND (cond, 1);
4916 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4917 if (stmts)
4918 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4920 *op_p = op;
4923 /* Ensure that operand *OP_P may be used at the end of EXIT without
4924 violating loop closed ssa form. */
4926 static void
4927 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4929 basic_block def_bb;
4930 struct loop *def_loop;
4931 tree phi, use;
4933 use = USE_FROM_PTR (op_p);
4934 if (TREE_CODE (use) != SSA_NAME)
4935 return;
4937 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4938 if (!def_bb)
4939 return;
4941 def_loop = def_bb->loop_father;
4942 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4943 return;
4945 /* Try finding a phi node that copies the value out of the loop. */
4946 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4947 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4948 break;
4950 if (!phi)
4952 /* Create such a phi node. */
4953 tree new_name = duplicate_ssa_name (use, NULL);
4955 phi = create_phi_node (new_name, exit->dest);
4956 SSA_NAME_DEF_STMT (new_name) = phi;
4957 add_phi_arg (phi, use, exit);
4960 SET_USE (op_p, PHI_RESULT (phi));
4963 /* Ensure that operands of STMT may be used at the end of EXIT without
4964 violating loop closed ssa form. */
4966 static void
4967 protect_loop_closed_ssa_form (edge exit, tree stmt)
4969 use_optype uses;
4970 vuse_optype vuses;
4971 v_may_def_optype v_may_defs;
4972 unsigned i;
4974 get_stmt_operands (stmt);
4976 uses = STMT_USE_OPS (stmt);
4977 for (i = 0; i < NUM_USES (uses); i++)
4978 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4980 vuses = STMT_VUSE_OPS (stmt);
4981 for (i = 0; i < NUM_VUSES (vuses); i++)
4982 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4984 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4985 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4986 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4989 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4990 so that they are emitted on the correct place, and so that the loop closed
4991 ssa form is preserved. */
4993 static void
4994 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4996 tree_stmt_iterator tsi;
4997 block_stmt_iterator bsi;
4998 tree phi, stmt, def, next;
5000 if (!single_pred_p (exit->dest))
5001 split_loop_exit_edge (exit);
5003 /* Ensure there is label in exit->dest, so that we can
5004 insert after it. */
5005 tree_block_label (exit->dest);
5006 bsi = bsi_after_labels (exit->dest);
5008 if (TREE_CODE (stmts) == STATEMENT_LIST)
5010 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5012 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5013 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5016 else
5018 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5019 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5022 if (!op)
5023 return;
5025 for (phi = phi_nodes (exit->dest); phi; phi = next)
5027 next = PHI_CHAIN (phi);
5029 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5031 def = PHI_RESULT (phi);
5032 remove_statement (phi, false);
5033 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5034 def, op);
5035 SSA_NAME_DEF_STMT (def) = stmt;
5036 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5041 /* Rewrites the final value of USE (that is only needed outside of the loop)
5042 using candidate CAND. */
5044 static void
5045 rewrite_use_outer (struct ivopts_data *data,
5046 struct iv_use *use, struct iv_cand *cand)
5048 edge exit;
5049 tree value, op, stmts, tgt;
5050 tree phi;
5052 switch (TREE_CODE (use->stmt))
5054 case PHI_NODE:
5055 tgt = PHI_RESULT (use->stmt);
5056 break;
5057 case MODIFY_EXPR:
5058 tgt = TREE_OPERAND (use->stmt, 0);
5059 break;
5060 default:
5061 gcc_unreachable ();
5064 exit = single_dom_exit (data->current_loop);
5066 if (exit)
5068 if (!cand->iv)
5070 bool ok = may_replace_final_value (data, use, &value);
5071 gcc_assert (ok);
5073 else
5074 value = get_computation_at (data->current_loop,
5075 use, cand, last_stmt (exit->src));
5077 value = unshare_expr (value);
5078 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5080 /* If we will preserve the iv anyway and we would need to perform
5081 some computation to replace the final value, do nothing. */
5082 if (stmts && name_info (data, tgt)->preserve_biv)
5083 return;
5085 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5087 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5089 if (USE_FROM_PTR (use_p) == tgt)
5090 SET_USE (use_p, op);
5093 if (stmts)
5094 compute_phi_arg_on_exit (exit, stmts, op);
5096 /* Enable removal of the statement. We cannot remove it directly,
5097 since we may still need the aliasing information attached to the
5098 ssa name defined by it. */
5099 name_info (data, tgt)->iv->have_use_for = false;
5100 return;
5103 /* If the variable is going to be preserved anyway, there is nothing to
5104 do. */
5105 if (name_info (data, tgt)->preserve_biv)
5106 return;
5108 /* Otherwise we just need to compute the iv. */
5109 rewrite_use_nonlinear_expr (data, use, cand);
5112 /* Rewrites USE using candidate CAND. */
5114 static void
5115 rewrite_use (struct ivopts_data *data,
5116 struct iv_use *use, struct iv_cand *cand)
5118 switch (use->type)
5120 case USE_NONLINEAR_EXPR:
5121 rewrite_use_nonlinear_expr (data, use, cand);
5122 break;
5124 case USE_OUTER:
5125 rewrite_use_outer (data, use, cand);
5126 break;
5128 case USE_ADDRESS:
5129 rewrite_use_address (data, use, cand);
5130 break;
5132 case USE_COMPARE:
5133 rewrite_use_compare (data, use, cand);
5134 break;
5136 default:
5137 gcc_unreachable ();
5139 update_stmt (use->stmt);
5142 /* Rewrite the uses using the selected induction variables. */
5144 static void
5145 rewrite_uses (struct ivopts_data *data)
5147 unsigned i;
5148 struct iv_cand *cand;
5149 struct iv_use *use;
5151 for (i = 0; i < n_iv_uses (data); i++)
5153 use = iv_use (data, i);
5154 cand = use->selected;
5155 gcc_assert (cand);
5157 rewrite_use (data, use, cand);
5161 /* Removes the ivs that are not used after rewriting. */
5163 static void
5164 remove_unused_ivs (struct ivopts_data *data)
5166 unsigned j;
5167 bitmap_iterator bi;
5169 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5171 struct version_info *info;
5173 info = ver_info (data, j);
5174 if (info->iv
5175 && !zero_p (info->iv->step)
5176 && !info->inv_id
5177 && !info->iv->have_use_for
5178 && !info->preserve_biv)
5179 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5183 /* Frees data allocated by the optimization of a single loop. */
5185 static void
5186 free_loop_data (struct ivopts_data *data)
5188 unsigned i, j;
5189 bitmap_iterator bi;
5191 htab_empty (data->niters);
5193 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5195 struct version_info *info;
5197 info = ver_info (data, i);
5198 if (info->iv)
5199 free (info->iv);
5200 info->iv = NULL;
5201 info->has_nonlin_use = false;
5202 info->preserve_biv = false;
5203 info->inv_id = 0;
5205 bitmap_clear (data->relevant);
5206 bitmap_clear (data->important_candidates);
5208 for (i = 0; i < n_iv_uses (data); i++)
5210 struct iv_use *use = iv_use (data, i);
5212 free (use->iv);
5213 BITMAP_FREE (use->related_cands);
5214 for (j = 0; j < use->n_map_members; j++)
5215 if (use->cost_map[j].depends_on)
5216 BITMAP_FREE (use->cost_map[j].depends_on);
5217 free (use->cost_map);
5218 free (use);
5220 VARRAY_POP_ALL (data->iv_uses);
5222 for (i = 0; i < n_iv_cands (data); i++)
5224 struct iv_cand *cand = iv_cand (data, i);
5226 if (cand->iv)
5227 free (cand->iv);
5228 free (cand);
5230 VARRAY_POP_ALL (data->iv_candidates);
5232 if (data->version_info_size < num_ssa_names)
5234 data->version_info_size = 2 * num_ssa_names;
5235 free (data->version_info);
5236 data->version_info = xcalloc (data->version_info_size,
5237 sizeof (struct version_info));
5240 data->max_inv_id = 0;
5242 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5244 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5246 SET_DECL_RTL (obj, NULL_RTX);
5248 VARRAY_POP_ALL (decl_rtl_to_reset);
5251 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5252 loop tree. */
5254 static void
5255 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5257 unsigned i;
5259 for (i = 1; i < loops->num; i++)
5260 if (loops->parray[i])
5262 free (loops->parray[i]->aux);
5263 loops->parray[i]->aux = NULL;
5266 free_loop_data (data);
5267 free (data->version_info);
5268 BITMAP_FREE (data->relevant);
5269 BITMAP_FREE (data->important_candidates);
5270 htab_delete (data->niters);
5272 VARRAY_FREE (decl_rtl_to_reset);
5273 VARRAY_FREE (data->iv_uses);
5274 VARRAY_FREE (data->iv_candidates);
5277 /* Optimizes the LOOP. Returns true if anything changed. */
5279 static bool
5280 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5282 bool changed = false;
5283 struct iv_ca *iv_ca;
5284 edge exit;
5286 data->current_loop = loop;
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5290 fprintf (dump_file, "Processing loop %d\n", loop->num);
5292 exit = single_dom_exit (loop);
5293 if (exit)
5295 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5296 exit->src->index, exit->dest->index);
5297 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5298 fprintf (dump_file, "\n");
5301 fprintf (dump_file, "\n");
5304 /* For each ssa name determines whether it behaves as an induction variable
5305 in some loop. */
5306 if (!find_induction_variables (data))
5307 goto finish;
5309 /* Finds interesting uses (item 1). */
5310 find_interesting_uses (data);
5311 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5312 goto finish;
5314 /* Finds candidates for the induction variables (item 2). */
5315 find_iv_candidates (data);
5317 /* Calculates the costs (item 3, part 1). */
5318 determine_use_iv_costs (data);
5319 determine_iv_costs (data);
5320 determine_set_costs (data);
5322 /* Find the optimal set of induction variables (item 3, part 2). */
5323 iv_ca = find_optimal_iv_set (data);
5324 if (!iv_ca)
5325 goto finish;
5326 changed = true;
5328 /* Create the new induction variables (item 4, part 1). */
5329 create_new_ivs (data, iv_ca);
5330 iv_ca_free (&iv_ca);
5332 /* Rewrite the uses (item 4, part 2). */
5333 rewrite_uses (data);
5335 /* Remove the ivs that are unused after rewriting. */
5336 remove_unused_ivs (data);
5338 /* We have changed the structure of induction variables; it might happen
5339 that definitions in the scev database refer to some of them that were
5340 eliminated. */
5341 scev_reset ();
5343 finish:
5344 free_loop_data (data);
5346 return changed;
5349 /* Main entry point. Optimizes induction variables in LOOPS. */
5351 void
5352 tree_ssa_iv_optimize (struct loops *loops)
5354 struct loop *loop;
5355 struct ivopts_data data;
5357 tree_ssa_iv_optimize_init (loops, &data);
5359 /* Optimize the loops starting with the innermost ones. */
5360 loop = loops->tree_root;
5361 while (loop->inner)
5362 loop = loop->inner;
5364 #ifdef ENABLE_CHECKING
5365 verify_loop_closed_ssa ();
5366 verify_stmts ();
5367 #endif
5369 /* Scan the loops, inner ones first. */
5370 while (loop != loops->tree_root)
5372 if (dump_file && (dump_flags & TDF_DETAILS))
5373 flow_loop_dump (loop, dump_file, NULL, 1);
5375 tree_ssa_iv_optimize_loop (&data, loop);
5377 if (loop->next)
5379 loop = loop->next;
5380 while (loop->inner)
5381 loop = loop->inner;
5383 else
5384 loop = loop->outer;
5387 #ifdef ENABLE_CHECKING
5388 verify_loop_closed_ssa ();
5389 verify_stmts ();
5390 #endif
5392 tree_ssa_iv_optimize_finalize (loops, &data);