Mark ChangeLog
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobb3b28c82e637cdc5577236ac8c342ab9ce864f7c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
148 /* Use. */
149 struct iv_use
151 unsigned id; /* The id of the use. */
152 enum use_type type; /* Type of the use. */
153 struct iv *iv; /* The induction variable it is based on. */
154 tree stmt; /* Statement in that it occurs. */
155 tree *op_p; /* The place where it occurs. */
156 bitmap related_cands; /* The set of "related" iv candidates, plus the common
157 important ones. */
159 unsigned n_map_members; /* Number of candidates in the cost_map list. */
160 struct cost_pair *cost_map;
161 /* The costs wrto the iv candidates. */
163 struct iv_cand *selected;
164 /* The selected candidate. */
167 /* The position where the iv is computed. */
168 enum iv_position
170 IP_NORMAL, /* At the end, just before the exit condition. */
171 IP_END, /* At the end of the latch block. */
172 IP_ORIGINAL /* The original biv. */
175 /* The induction variable candidate. */
176 struct iv_cand
178 unsigned id; /* The number of the candidate. */
179 bool important; /* Whether this is an "important" candidate, i.e. such
180 that it should be considered by all uses. */
181 enum iv_position pos; /* Where it is computed. */
182 tree incremented_at; /* For original biv, the statement where it is
183 incremented. */
184 tree var_before; /* The variable used for it before increment. */
185 tree var_after; /* The variable used for it after increment. */
186 struct iv *iv; /* The value of the candidate. NULL for
187 "pseudocandidate" used to indicate the possibility
188 to replace the final value of an iv by direct
189 computation of the value. */
190 unsigned cost; /* Cost of the candidate. */
193 /* The data used by the induction variable optimizations. */
195 struct ivopts_data
197 /* The currently optimized loop. */
198 struct loop *current_loop;
200 /* Numbers of iterations for all exits of the current loop. */
201 htab_t niters;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
215 /* The uses of induction variables. */
216 varray_type iv_uses;
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* A bitmap of important candidates. */
222 bitmap important_candidates;
224 /* Whether to consider just related and important candidates when replacing a
225 use. */
226 bool consider_all_candidates;
229 /* An assignment of iv candidates to uses. */
231 struct iv_ca
233 /* The number of uses covered by the assignment. */
234 unsigned upto;
236 /* Number of uses that cannot be expressed by the candidates in the set. */
237 unsigned bad_uses;
239 /* Candidate assigned to a use, together with the related costs. */
240 struct cost_pair **cand_for_use;
242 /* Number of times each candidate is used. */
243 unsigned *n_cand_uses;
245 /* The candidates used. */
246 bitmap cands;
248 /* The number of candidates in the set. */
249 unsigned n_cands;
251 /* Total number of registers needed. */
252 unsigned n_regs;
254 /* Total cost of expressing uses. */
255 unsigned cand_use_cost;
257 /* Total cost of candidates. */
258 unsigned cand_cost;
260 /* Number of times each invariant is used. */
261 unsigned *n_invariant_uses;
263 /* Total cost of the assignment. */
264 unsigned cost;
267 /* Difference of two iv candidate assignments. */
269 struct iv_ca_delta
271 /* Changed use. */
272 struct iv_use *use;
274 /* An old assignment (for rollback purposes). */
275 struct cost_pair *old_cp;
277 /* A new assignment. */
278 struct cost_pair *new_cp;
280 /* Next change in the list. */
281 struct iv_ca_delta *next_change;
284 /* Bound on number of candidates below that all candidates are considered. */
286 #define CONSIDER_ALL_CANDIDATES_BOUND \
287 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
290 optimizing such a loop would help, and it would take ages). */
292 #define MAX_CONSIDERED_USES \
293 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
295 /* If there are at most this number of ivs in the set, try removing unnecessary
296 ivs from the set always. */
298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
301 /* The list of trees for that the decl_rtl field must be reset is stored
302 here. */
304 static varray_type decl_rtl_to_reset;
306 /* Number of uses recorded in DATA. */
308 static inline unsigned
309 n_iv_uses (struct ivopts_data *data)
311 return VARRAY_ACTIVE_SIZE (data->iv_uses);
314 /* Ith use recorded in DATA. */
316 static inline struct iv_use *
317 iv_use (struct ivopts_data *data, unsigned i)
319 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
322 /* Number of candidates recorded in DATA. */
324 static inline unsigned
325 n_iv_cands (struct ivopts_data *data)
327 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
330 /* Ith candidate recorded in DATA. */
332 static inline struct iv_cand *
333 iv_cand (struct ivopts_data *data, unsigned i)
335 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
338 /* The data for LOOP. */
340 static inline struct loop_data *
341 loop_data (struct loop *loop)
343 return loop->aux;
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 static edge
349 single_dom_exit (struct loop *loop)
351 edge exit = loop->single_exit;
353 if (!exit)
354 return NULL;
356 if (!just_once_each_iteration_p (loop, exit->src))
357 return NULL;
359 return exit;
362 /* Dumps information about the induction variable IV to FILE. */
364 extern void dump_iv (FILE *, struct iv *);
365 void
366 dump_iv (FILE *file, struct iv *iv)
368 if (iv->ssa_name)
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
379 if (iv->step)
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
389 else
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
396 if (iv->base_object)
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
403 if (iv->biv_p)
404 fprintf (file, " is a biv\n");
407 /* Dumps information about the USE to FILE. */
409 extern void dump_use (FILE *, struct iv_use *);
410 void
411 dump_use (FILE *file, struct iv_use *use)
413 fprintf (file, "use %d\n", use->id);
415 switch (use->type)
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
419 break;
421 case USE_OUTER:
422 fprintf (file, " outside\n");
423 break;
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
433 default:
434 gcc_unreachable ();
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
461 unsigned i;
462 struct iv_use *use;
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
484 if (!iv)
486 fprintf (file, " final value replacement\n");
487 return;
490 switch (cand->pos)
492 case IP_NORMAL:
493 fprintf (file, " incremented before exit test\n");
494 break;
496 case IP_END:
497 fprintf (file, " incremented at end\n");
498 break;
500 case IP_ORIGINAL:
501 fprintf (file, " original biv\n");
502 break;
505 dump_iv (file, iv);
508 /* Returns the info for ssa version VER. */
510 static inline struct version_info *
511 ver_info (struct ivopts_data *data, unsigned ver)
513 return data->version_info + ver;
516 /* Returns the info for ssa name NAME. */
518 static inline struct version_info *
519 name_info (struct ivopts_data *data, tree name)
521 return ver_info (data, SSA_NAME_VERSION (name));
524 /* Checks whether there exists number X such that X * B = A, counting modulo
525 2^BITS. */
527 static bool
528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
529 HOST_WIDE_INT *x)
531 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
532 unsigned HOST_WIDE_INT inv, ex, val;
533 unsigned i;
535 a &= mask;
536 b &= mask;
538 /* First divide the whole equation by 2 as long as possible. */
539 while (!(a & 1) && !(b & 1))
541 a >>= 1;
542 b >>= 1;
543 bits--;
544 mask >>= 1;
547 if (!(b & 1))
549 /* If b is still even, a is odd and there is no such x. */
550 return false;
553 /* Find the inverse of b. We compute it as
554 b^(2^(bits - 1) - 1) (mod 2^bits). */
555 inv = 1;
556 ex = b;
557 for (i = 0; i < bits - 1; i++)
559 inv = (inv * ex) & mask;
560 ex = (ex * ex) & mask;
563 val = (a * inv) & mask;
565 gcc_assert (((val * b) & mask) == a);
567 if ((val >> (bits - 1)) & 1)
568 val |= ~mask;
570 *x = val;
572 return true;
575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
576 emitted in LOOP. */
578 static bool
579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
581 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
583 gcc_assert (bb);
585 if (sbb == loop->latch)
586 return true;
588 if (sbb != bb)
589 return false;
591 return stmt == last_stmt (bb);
594 /* Returns true if STMT if after the place where the original induction
595 variable CAND is incremented. */
597 static bool
598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
600 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
601 basic_block stmt_bb = bb_for_stmt (stmt);
602 block_stmt_iterator bsi;
604 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
605 return false;
607 if (stmt_bb != cand_bb)
608 return true;
610 /* Scan the block from the end, since the original ivs are usually
611 incremented at the end of the loop body. */
612 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
614 if (bsi_stmt (bsi) == cand->incremented_at)
615 return false;
616 if (bsi_stmt (bsi) == stmt)
617 return true;
621 /* Returns true if STMT if after the place where the induction variable
622 CAND is incremented in LOOP. */
624 static bool
625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
627 switch (cand->pos)
629 case IP_END:
630 return false;
632 case IP_NORMAL:
633 return stmt_after_ip_normal_pos (loop, stmt);
635 case IP_ORIGINAL:
636 return stmt_after_ip_original_pos (cand, stmt);
638 default:
639 gcc_unreachable ();
643 /* Element of the table in that we cache the numbers of iterations obtained
644 from exits of the loop. */
646 struct nfe_cache_elt
648 /* The edge for that the number of iterations is cached. */
649 edge exit;
651 /* True if the # of iterations was successfully determined. */
652 bool valid_p;
654 /* Description of # of iterations. */
655 struct tree_niter_desc niter;
658 /* Hash function for nfe_cache_elt E. */
660 static hashval_t
661 nfe_hash (const void *e)
663 const struct nfe_cache_elt *elt = e;
665 return htab_hash_pointer (elt->exit);
668 /* Equality function for nfe_cache_elt E1 and edge E2. */
670 static int
671 nfe_eq (const void *e1, const void *e2)
673 const struct nfe_cache_elt *elt1 = e1;
675 return elt1->exit == e2;
678 /* Returns structure describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
681 static struct tree_niter_desc *
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct nfe_cache_elt *nfe_desc;
685 PTR *slot;
687 slot = htab_find_slot_with_hash (data->niters, exit,
688 htab_hash_pointer (exit),
689 INSERT);
691 if (!*slot)
693 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
694 nfe_desc->exit = exit;
695 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
696 exit, &nfe_desc->niter);
697 *slot = nfe_desc;
699 else
700 nfe_desc = *slot;
702 if (!nfe_desc->valid_p)
703 return NULL;
705 return &nfe_desc->niter;
708 /* Returns structure describing number of iterations determined from
709 single dominating exit of DATA->current_loop, or NULL if something
710 goes wrong. */
712 static struct tree_niter_desc *
713 niter_for_single_dom_exit (struct ivopts_data *data)
715 edge exit = single_dom_exit (data->current_loop);
717 if (!exit)
718 return NULL;
720 return niter_for_exit (data, exit);
723 /* Initializes data structures used by the iv optimization pass, stored
724 in DATA. LOOPS is the loop tree. */
726 static void
727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
729 unsigned i;
731 data->version_info_size = 2 * num_ssa_names;
732 data->version_info = xcalloc (data->version_info_size,
733 sizeof (struct version_info));
734 data->relevant = BITMAP_ALLOC (NULL);
735 data->important_candidates = BITMAP_ALLOC (NULL);
736 data->max_inv_id = 0;
737 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
739 for (i = 1; i < loops->num; i++)
740 if (loops->parray[i])
741 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
744 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
745 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
748 /* Returns a memory object to that EXPR points. In case we are able to
749 determine that it does not point to any such object, NULL is returned. */
751 static tree
752 determine_base_object (tree expr)
754 enum tree_code code = TREE_CODE (expr);
755 tree base, obj, op0, op1;
757 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
758 return NULL_TREE;
760 switch (code)
762 case INTEGER_CST:
763 return NULL_TREE;
765 case ADDR_EXPR:
766 obj = TREE_OPERAND (expr, 0);
767 base = get_base_address (obj);
769 if (!base)
770 return expr;
772 if (TREE_CODE (base) == INDIRECT_REF)
773 return determine_base_object (TREE_OPERAND (base, 0));
775 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
777 case PLUS_EXPR:
778 case MINUS_EXPR:
779 op0 = determine_base_object (TREE_OPERAND (expr, 0));
780 op1 = determine_base_object (TREE_OPERAND (expr, 1));
782 if (!op1)
783 return op0;
785 if (!op0)
786 return (code == PLUS_EXPR
787 ? op1
788 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
790 return fold (build (code, ptr_type_node, op0, op1));
792 case NOP_EXPR:
793 case CONVERT_EXPR:
794 return determine_base_object (TREE_OPERAND (expr, 0));
796 default:
797 return fold_convert (ptr_type_node, expr);
801 /* Allocates an induction variable with given initial value BASE and step STEP
802 for loop LOOP. */
804 static struct iv *
805 alloc_iv (tree base, tree step)
807 struct iv *iv = xcalloc (1, sizeof (struct iv));
809 if (step && integer_zerop (step))
810 step = NULL_TREE;
812 iv->base = base;
813 iv->base_object = determine_base_object (base);
814 iv->step = step;
815 iv->biv_p = false;
816 iv->have_use_for = false;
817 iv->use_id = 0;
818 iv->ssa_name = NULL_TREE;
820 return iv;
823 /* Sets STEP and BASE for induction variable IV. */
825 static void
826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
828 struct version_info *info = name_info (data, iv);
830 gcc_assert (!info->iv);
832 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
833 info->iv = alloc_iv (base, step);
834 info->iv->ssa_name = iv;
837 /* Finds induction variable declaration for VAR. */
839 static struct iv *
840 get_iv (struct ivopts_data *data, tree var)
842 basic_block bb;
844 if (!name_info (data, var)->iv)
846 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
848 if (!bb
849 || !flow_bb_inside_loop_p (data->current_loop, bb))
850 set_iv (data, var, var, NULL_TREE);
853 return name_info (data, var)->iv;
856 /* Determines the step of a biv defined in PHI. */
858 static tree
859 determine_biv_step (tree phi)
861 struct loop *loop = bb_for_stmt (phi)->loop_father;
862 tree name = PHI_RESULT (phi), base, step;
863 tree type = TREE_TYPE (name);
865 if (!is_gimple_reg (name))
866 return NULL_TREE;
868 if (!simple_iv (loop, phi, name, &base, &step))
869 return NULL_TREE;
871 if (!step)
872 return build_int_cst (type, 0);
874 return step;
877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
879 static bool
880 abnormal_ssa_name_p (tree exp)
882 if (!exp)
883 return false;
885 if (TREE_CODE (exp) != SSA_NAME)
886 return false;
888 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
892 abnormal phi node. Callback for for_each_index. */
894 static bool
895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
896 void *data ATTRIBUTE_UNUSED)
898 if (TREE_CODE (base) == ARRAY_REF)
900 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
901 return false;
902 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
903 return false;
906 return !abnormal_ssa_name_p (*index);
909 /* Returns true if EXPR contains a ssa name that occurs in an
910 abnormal phi node. */
912 static bool
913 contains_abnormal_ssa_name_p (tree expr)
915 enum tree_code code = TREE_CODE (expr);
916 enum tree_code_class class = TREE_CODE_CLASS (code);
918 if (code == SSA_NAME)
919 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
921 if (code == INTEGER_CST
922 || is_gimple_min_invariant (expr))
923 return false;
925 if (code == ADDR_EXPR)
926 return !for_each_index (&TREE_OPERAND (expr, 0),
927 idx_contains_abnormal_ssa_name_p,
928 NULL);
930 switch (class)
932 case tcc_binary:
933 case tcc_comparison:
934 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
935 return true;
937 /* Fallthru. */
938 case tcc_unary:
939 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
940 return true;
942 break;
944 default:
945 gcc_unreachable ();
948 return false;
951 /* Finds basic ivs. */
953 static bool
954 find_bivs (struct ivopts_data *data)
956 tree phi, step, type, base;
957 bool found = false;
958 struct loop *loop = data->current_loop;
960 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
962 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
963 continue;
965 step = determine_biv_step (phi);
967 if (!step)
968 continue;
969 if (cst_and_fits_in_hwi (step)
970 && int_cst_value (step) == 0)
971 continue;
973 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
974 if (contains_abnormal_ssa_name_p (base))
975 continue;
977 type = TREE_TYPE (PHI_RESULT (phi));
978 base = fold_convert (type, base);
979 step = fold_convert (type, step);
981 /* FIXME: We do not handle induction variables whose step does
982 not satisfy cst_and_fits_in_hwi. */
983 if (!cst_and_fits_in_hwi (step))
984 continue;
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
990 return found;
993 /* Marks basic ivs. */
995 static void
996 mark_bivs (struct ivopts_data *data)
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to BASE and STEP. */
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030 tree *base, tree *step)
1032 tree lhs;
1033 struct loop *loop = data->current_loop;
1035 *base = NULL_TREE;
1036 *step = NULL_TREE;
1038 if (TREE_CODE (stmt) != MODIFY_EXPR)
1039 return false;
1041 lhs = TREE_OPERAND (stmt, 0);
1042 if (TREE_CODE (lhs) != SSA_NAME)
1043 return false;
1045 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046 return false;
1048 /* FIXME: We do not handle induction variables whose step does
1049 not satisfy cst_and_fits_in_hwi. */
1050 if (!zero_p (*step)
1051 && !cst_and_fits_in_hwi (*step))
1052 return false;
1054 if (contains_abnormal_ssa_name_p (*base))
1055 return false;
1057 return true;
1060 /* Finds general ivs in statement STMT. */
1062 static void
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1065 tree base, step;
1067 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068 return;
1070 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1073 /* Finds general ivs in basic block BB. */
1075 static void
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1078 block_stmt_iterator bsi;
1080 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081 find_givs_in_stmt (data, bsi_stmt (bsi));
1084 /* Finds general ivs. */
1086 static void
1087 find_givs (struct ivopts_data *data)
1089 struct loop *loop = data->current_loop;
1090 basic_block *body = get_loop_body_in_dom_order (loop);
1091 unsigned i;
1093 for (i = 0; i < loop->num_nodes; i++)
1094 find_givs_in_bb (data, body[i]);
1095 free (body);
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099 variable and if so, its initial value and step. */
1101 static bool
1102 find_induction_variables (struct ivopts_data *data)
1104 unsigned i;
1105 bitmap_iterator bi;
1107 if (!find_bivs (data))
1108 return false;
1110 find_givs (data);
1111 mark_bivs (data);
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1115 struct tree_niter_desc *niter;
1117 niter = niter_for_single_dom_exit (data);
1119 if (niter)
1121 fprintf (dump_file, " number of iterations ");
1122 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123 fprintf (dump_file, "\n");
1125 fprintf (dump_file, " may be zero if ");
1126 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127 fprintf (dump_file, "\n");
1128 fprintf (dump_file, "\n");
1131 fprintf (dump_file, "Induction variables:\n\n");
1133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1135 if (ver_info (data, i)->iv)
1136 dump_iv (dump_file, ver_info (data, i)->iv);
1140 return true;
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147 tree stmt, enum use_type use_type)
1149 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1151 use->id = n_iv_uses (data);
1152 use->type = use_type;
1153 use->iv = iv;
1154 use->stmt = stmt;
1155 use->op_p = use_p;
1156 use->related_cands = BITMAP_ALLOC (NULL);
1158 /* To avoid showing ssa name in the dumps, if it was not reset by the
1159 caller. */
1160 iv->ssa_name = NULL_TREE;
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 dump_use (dump_file, use);
1165 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1167 return use;
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171 NONLINEAR_USE is true if the invariant is used in a way we do not
1172 handle specially. */
1174 static void
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1177 basic_block bb;
1178 struct version_info *info;
1180 if (TREE_CODE (op) != SSA_NAME
1181 || !is_gimple_reg (op))
1182 return;
1184 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185 if (bb
1186 && flow_bb_inside_loop_p (data->current_loop, bb))
1187 return;
1189 info = name_info (data, op);
1190 info->name = op;
1191 info->has_nonlin_use |= nonlinear_use;
1192 if (!info->inv_id)
1193 info->inv_id = ++data->max_inv_id;
1194 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1197 /* Checks whether the use OP is interesting and if so, records it
1198 as TYPE. */
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202 enum use_type type)
1204 struct iv *iv;
1205 struct iv *civ;
1206 tree stmt;
1207 struct iv_use *use;
1209 if (TREE_CODE (op) != SSA_NAME)
1210 return NULL;
1212 iv = get_iv (data, op);
1213 if (!iv)
1214 return NULL;
1216 if (iv->have_use_for)
1218 use = iv_use (data, iv->use_id);
1220 gcc_assert (use->type == USE_NONLINEAR_EXPR
1221 || use->type == USE_OUTER);
1223 if (type == USE_NONLINEAR_EXPR)
1224 use->type = USE_NONLINEAR_EXPR;
1225 return use;
1228 if (zero_p (iv->step))
1230 record_invariant (data, op, true);
1231 return NULL;
1233 iv->have_use_for = true;
1235 civ = xmalloc (sizeof (struct iv));
1236 *civ = *iv;
1238 stmt = SSA_NAME_DEF_STMT (op);
1239 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240 || TREE_CODE (stmt) == MODIFY_EXPR);
1242 use = record_use (data, NULL, civ, stmt, type);
1243 iv->use_id = use->id;
1245 return use;
1248 /* Checks whether the use OP is interesting and if so, records it. */
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1253 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1256 /* Records a definition of induction variable OP that is used outside of the
1257 loop. */
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1262 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266 and if so, records it. */
1268 static void
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1271 tree *op0_p;
1272 tree *op1_p;
1273 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274 struct iv const_iv;
1275 tree zero = integer_zero_node;
1277 const_iv.step = NULL_TREE;
1279 if (integer_zerop (*cond_p)
1280 || integer_nonzerop (*cond_p))
1281 return;
1283 if (TREE_CODE (*cond_p) == SSA_NAME)
1285 op0_p = cond_p;
1286 op1_p = &zero;
1288 else
1290 op0_p = &TREE_OPERAND (*cond_p, 0);
1291 op1_p = &TREE_OPERAND (*cond_p, 1);
1294 if (TREE_CODE (*op0_p) == SSA_NAME)
1295 iv0 = get_iv (data, *op0_p);
1296 else
1297 iv0 = &const_iv;
1299 if (TREE_CODE (*op1_p) == SSA_NAME)
1300 iv1 = get_iv (data, *op1_p);
1301 else
1302 iv1 = &const_iv;
1304 if (/* When comparing with non-invariant value, we may not do any senseful
1305 induction variable elimination. */
1306 (!iv0 || !iv1)
1307 /* Eliminating condition based on two ivs would be nontrivial.
1308 ??? TODO -- it is not really important to handle this case. */
1309 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1311 find_interesting_uses_op (data, *op0_p);
1312 find_interesting_uses_op (data, *op1_p);
1313 return;
1316 if (zero_p (iv0->step) && zero_p (iv1->step))
1318 /* If both are invariants, this is a work for unswitching. */
1319 return;
1322 civ = xmalloc (sizeof (struct iv));
1323 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328 i.e. if all its operands are defined outside of the LOOP. */
1330 bool
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1333 basic_block def_bb;
1334 unsigned i, len;
1336 if (is_gimple_min_invariant (expr))
1337 return true;
1339 if (TREE_CODE (expr) == SSA_NAME)
1341 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342 if (def_bb
1343 && flow_bb_inside_loop_p (loop, def_bb))
1344 return false;
1346 return true;
1349 if (!EXPR_P (expr))
1350 return false;
1352 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353 for (i = 0; i < len; i++)
1354 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355 return false;
1357 return true;
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361 initial ones. Returns false when the value of the index cannot be determined.
1362 Callback for for_each_index. */
1364 struct ifs_ivopts_data
1366 struct ivopts_data *ivopts_data;
1367 tree stmt;
1368 tree *step_p;
1371 static bool
1372 idx_find_step (tree base, tree *idx, void *data)
1374 struct ifs_ivopts_data *dta = data;
1375 struct iv *iv;
1376 tree step, type, iv_type, iv_step, lbound, off;
1377 struct loop *loop = dta->ivopts_data->current_loop;
1379 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381 return false;
1383 /* If base is a component ref, require that the offset of the reference
1384 be invariant. */
1385 if (TREE_CODE (base) == COMPONENT_REF)
1387 off = component_ref_field_offset (base);
1388 return expr_invariant_in_loop_p (loop, off);
1391 /* If base is array, first check whether we will be able to move the
1392 reference out of the loop (in order to take its address in strength
1393 reduction). In order for this to work we need both lower bound
1394 and step to be loop invariants. */
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 step = array_ref_element_size (base);
1398 lbound = array_ref_low_bound (base);
1400 if (!expr_invariant_in_loop_p (loop, step)
1401 || !expr_invariant_in_loop_p (loop, lbound))
1402 return false;
1405 if (TREE_CODE (*idx) != SSA_NAME)
1406 return true;
1408 iv = get_iv (dta->ivopts_data, *idx);
1409 if (!iv)
1410 return false;
1412 *idx = iv->base;
1414 if (!iv->step)
1415 return true;
1417 iv_type = TREE_TYPE (iv->base);
1418 type = build_pointer_type (TREE_TYPE (base));
1419 if (TREE_CODE (base) == ARRAY_REF)
1421 step = array_ref_element_size (base);
1423 /* We only handle addresses whose step is an integer constant. */
1424 if (TREE_CODE (step) != INTEGER_CST)
1425 return false;
1427 else
1428 /* The step for pointer arithmetics already is 1 byte. */
1429 step = build_int_cst (type, 1);
1431 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433 type, iv->base, iv->step, dta->stmt);
1434 else
1435 iv_step = fold_convert (iv_type, iv->step);
1437 if (!iv_step)
1439 /* The index might wrap. */
1440 return false;
1443 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1445 if (!*dta->step_p)
1446 *dta->step_p = step;
1447 else
1448 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449 *dta->step_p, step);
1451 return true;
1454 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1455 object is passed to it in DATA. */
1457 static bool
1458 idx_record_use (tree base, tree *idx,
1459 void *data)
1461 find_interesting_uses_op (data, *idx);
1462 if (TREE_CODE (base) == ARRAY_REF)
1464 find_interesting_uses_op (data, array_ref_element_size (base));
1465 find_interesting_uses_op (data, array_ref_low_bound (base));
1467 return true;
1470 /* Returns true if memory reference REF may be unaligned. */
1472 static bool
1473 may_be_unaligned_p (tree ref)
1475 tree base;
1476 tree base_type;
1477 HOST_WIDE_INT bitsize;
1478 HOST_WIDE_INT bitpos;
1479 tree toffset;
1480 enum machine_mode mode;
1481 int unsignedp, volatilep;
1482 unsigned base_align;
1484 /* The test below is basically copy of what expr.c:normal_inner_ref
1485 does to check whether the object must be loaded by parts when
1486 STRICT_ALIGNMENT is true. */
1487 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488 &unsignedp, &volatilep, true);
1489 base_type = TREE_TYPE (base);
1490 base_align = TYPE_ALIGN (base_type);
1492 if (mode != BLKmode
1493 && (base_align < GET_MODE_ALIGNMENT (mode)
1494 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495 || bitpos % BITS_PER_UNIT != 0))
1496 return true;
1498 return false;
1501 /* Finds addresses in *OP_P inside STMT. */
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 tree base = unshare_expr (*op_p), step = NULL;
1507 struct iv *civ;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1510 /* Do not play with volatile memory references. A bit too conservative,
1511 perhaps, but safe. */
1512 if (stmt_ann (stmt)->has_volatile_ops)
1513 goto fail;
1515 /* Ignore bitfields for now. Not really something terribly complicated
1516 to handle. TODO. */
1517 if (TREE_CODE (base) == COMPONENT_REF
1518 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1519 goto fail;
1521 if (STRICT_ALIGNMENT
1522 && may_be_unaligned_p (base))
1523 goto fail;
1525 ifs_ivopts_data.ivopts_data = data;
1526 ifs_ivopts_data.stmt = stmt;
1527 ifs_ivopts_data.step_p = &step;
1528 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1529 || zero_p (step))
1530 goto fail;
1532 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1533 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1535 if (TREE_CODE (base) == INDIRECT_REF)
1536 base = TREE_OPERAND (base, 0);
1537 else
1538 base = build_addr (base);
1540 civ = alloc_iv (base, step);
1541 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1542 return;
1544 fail:
1545 for_each_index (op_p, idx_record_use, data);
1548 /* Finds and records invariants used in STMT. */
1550 static void
1551 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1553 use_optype uses = NULL;
1554 unsigned i, n;
1555 tree op;
1557 if (TREE_CODE (stmt) == PHI_NODE)
1558 n = PHI_NUM_ARGS (stmt);
1559 else
1561 get_stmt_operands (stmt);
1562 uses = STMT_USE_OPS (stmt);
1563 n = NUM_USES (uses);
1566 for (i = 0; i < n; i++)
1568 if (TREE_CODE (stmt) == PHI_NODE)
1569 op = PHI_ARG_DEF (stmt, i);
1570 else
1571 op = USE_OP (uses, i);
1573 record_invariant (data, op, false);
1577 /* Finds interesting uses of induction variables in the statement STMT. */
1579 static void
1580 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1582 struct iv *iv;
1583 tree op, lhs, rhs;
1584 use_optype uses = NULL;
1585 unsigned i, n;
1587 find_invariants_stmt (data, stmt);
1589 if (TREE_CODE (stmt) == COND_EXPR)
1591 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1592 return;
1595 if (TREE_CODE (stmt) == MODIFY_EXPR)
1597 lhs = TREE_OPERAND (stmt, 0);
1598 rhs = TREE_OPERAND (stmt, 1);
1600 if (TREE_CODE (lhs) == SSA_NAME)
1602 /* If the statement defines an induction variable, the uses are not
1603 interesting by themselves. */
1605 iv = get_iv (data, lhs);
1607 if (iv && !zero_p (iv->step))
1608 return;
1611 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1613 case tcc_comparison:
1614 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1615 return;
1617 case tcc_reference:
1618 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1619 if (REFERENCE_CLASS_P (lhs))
1620 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1621 return;
1623 default: ;
1626 if (REFERENCE_CLASS_P (lhs)
1627 && is_gimple_val (rhs))
1629 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1630 find_interesting_uses_op (data, rhs);
1631 return;
1634 /* TODO -- we should also handle address uses of type
1636 memory = call (whatever);
1640 call (memory). */
1643 if (TREE_CODE (stmt) == PHI_NODE
1644 && bb_for_stmt (stmt) == data->current_loop->header)
1646 lhs = PHI_RESULT (stmt);
1647 iv = get_iv (data, lhs);
1649 if (iv && !zero_p (iv->step))
1650 return;
1653 if (TREE_CODE (stmt) == PHI_NODE)
1654 n = PHI_NUM_ARGS (stmt);
1655 else
1657 uses = STMT_USE_OPS (stmt);
1658 n = NUM_USES (uses);
1661 for (i = 0; i < n; i++)
1663 if (TREE_CODE (stmt) == PHI_NODE)
1664 op = PHI_ARG_DEF (stmt, i);
1665 else
1666 op = USE_OP (uses, i);
1668 if (TREE_CODE (op) != SSA_NAME)
1669 continue;
1671 iv = get_iv (data, op);
1672 if (!iv)
1673 continue;
1675 find_interesting_uses_op (data, op);
1679 /* Finds interesting uses of induction variables outside of loops
1680 on loop exit edge EXIT. */
1682 static void
1683 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1685 tree phi, def;
1687 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1689 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1690 find_interesting_uses_outer (data, def);
1694 /* Finds uses of the induction variables that are interesting. */
1696 static void
1697 find_interesting_uses (struct ivopts_data *data)
1699 basic_block bb;
1700 block_stmt_iterator bsi;
1701 tree phi;
1702 basic_block *body = get_loop_body (data->current_loop);
1703 unsigned i;
1704 struct version_info *info;
1705 edge e;
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1708 fprintf (dump_file, "Uses:\n\n");
1710 for (i = 0; i < data->current_loop->num_nodes; i++)
1712 edge_iterator ei;
1713 bb = body[i];
1715 FOR_EACH_EDGE (e, ei, bb->succs)
1716 if (e->dest != EXIT_BLOCK_PTR
1717 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1718 find_interesting_uses_outside (data, e);
1720 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1721 find_interesting_uses_stmt (data, phi);
1722 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1723 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1726 if (dump_file && (dump_flags & TDF_DETAILS))
1728 bitmap_iterator bi;
1730 fprintf (dump_file, "\n");
1732 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1734 info = ver_info (data, i);
1735 if (info->inv_id)
1737 fprintf (dump_file, " ");
1738 print_generic_expr (dump_file, info->name, TDF_SLIM);
1739 fprintf (dump_file, " is invariant (%d)%s\n",
1740 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1744 fprintf (dump_file, "\n");
1747 free (body);
1750 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1751 is true, assume we are inside an address. */
1753 static tree
1754 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1756 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1757 enum tree_code code;
1758 tree type, orig_type = TREE_TYPE (expr);
1759 unsigned HOST_WIDE_INT off0, off1, st;
1760 tree orig_expr = expr;
1762 STRIP_NOPS (expr);
1763 type = TREE_TYPE (expr);
1764 code = TREE_CODE (expr);
1765 *offset = 0;
1767 switch (code)
1769 case INTEGER_CST:
1770 if (!cst_and_fits_in_hwi (expr)
1771 || zero_p (expr))
1772 return orig_expr;
1774 *offset = int_cst_value (expr);
1775 return build_int_cst_type (orig_type, 0);
1777 case PLUS_EXPR:
1778 case MINUS_EXPR:
1779 op0 = TREE_OPERAND (expr, 0);
1780 op1 = TREE_OPERAND (expr, 1);
1782 op0 = strip_offset (op0, false, &off0);
1783 op1 = strip_offset (op1, false, &off1);
1785 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1786 if (op0 == TREE_OPERAND (expr, 0)
1787 && op1 == TREE_OPERAND (expr, 1))
1788 return orig_expr;
1790 if (zero_p (op1))
1791 expr = op0;
1792 else if (zero_p (op0))
1794 if (code == PLUS_EXPR)
1795 expr = op1;
1796 else
1797 expr = build1 (NEGATE_EXPR, type, op1);
1799 else
1800 expr = build2 (code, type, op0, op1);
1802 return fold_convert (orig_type, expr);
1804 case ARRAY_REF:
1805 if (!inside_addr)
1806 return orig_expr;
1808 step = array_ref_element_size (expr);
1809 if (!cst_and_fits_in_hwi (step))
1810 break;
1812 st = int_cst_value (step);
1813 op1 = TREE_OPERAND (expr, 1);
1814 op1 = strip_offset (op1, false, &off1);
1815 *offset = off1 * st;
1816 break;
1818 case COMPONENT_REF:
1819 if (!inside_addr)
1820 return orig_expr;
1821 break;
1823 case ADDR_EXPR:
1824 inside_addr = true;
1825 break;
1827 default:
1828 return orig_expr;
1831 /* Default handling of expressions for that we want to recurse into
1832 the first operand. */
1833 op0 = TREE_OPERAND (expr, 0);
1834 op0 = strip_offset (op0, inside_addr, &off0);
1835 *offset += off0;
1837 if (op0 == TREE_OPERAND (expr, 0)
1838 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1839 return orig_expr;
1841 expr = copy_node (expr);
1842 TREE_OPERAND (expr, 0) = op0;
1843 if (op1)
1844 TREE_OPERAND (expr, 1) = op1;
1846 return fold_convert (orig_type, expr);
1849 /* Returns variant of TYPE that can be used as base for different uses.
1850 For integer types, we return unsigned variant of the type, which
1851 avoids problems with overflows. For pointer types, we return void *. */
1853 static tree
1854 generic_type_for (tree type)
1856 if (POINTER_TYPE_P (type))
1857 return ptr_type_node;
1859 if (TYPE_UNSIGNED (type))
1860 return type;
1862 return unsigned_type_for (type);
1865 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1866 position to POS. If USE is not NULL, the candidate is set as related to
1867 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1868 replacement of the final value of the iv by a direct computation. */
1870 static struct iv_cand *
1871 add_candidate_1 (struct ivopts_data *data,
1872 tree base, tree step, bool important, enum iv_position pos,
1873 struct iv_use *use, tree incremented_at)
1875 unsigned i;
1876 struct iv_cand *cand = NULL;
1877 tree type, orig_type;
1879 if (base)
1881 orig_type = TREE_TYPE (base);
1882 type = generic_type_for (orig_type);
1883 if (type != orig_type)
1885 base = fold_convert (type, base);
1886 if (step)
1887 step = fold_convert (type, step);
1891 for (i = 0; i < n_iv_cands (data); i++)
1893 cand = iv_cand (data, i);
1895 if (cand->pos != pos)
1896 continue;
1898 if (cand->incremented_at != incremented_at)
1899 continue;
1901 if (!cand->iv)
1903 if (!base && !step)
1904 break;
1906 continue;
1909 if (!base && !step)
1910 continue;
1912 if (!operand_equal_p (base, cand->iv->base, 0))
1913 continue;
1915 if (zero_p (cand->iv->step))
1917 if (zero_p (step))
1918 break;
1920 else
1922 if (step && operand_equal_p (step, cand->iv->step, 0))
1923 break;
1927 if (i == n_iv_cands (data))
1929 cand = xcalloc (1, sizeof (struct iv_cand));
1930 cand->id = i;
1932 if (!base && !step)
1933 cand->iv = NULL;
1934 else
1935 cand->iv = alloc_iv (base, step);
1937 cand->pos = pos;
1938 if (pos != IP_ORIGINAL && cand->iv)
1940 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1941 cand->var_after = cand->var_before;
1943 cand->important = important;
1944 cand->incremented_at = incremented_at;
1945 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1948 dump_cand (dump_file, cand);
1951 if (important && !cand->important)
1953 cand->important = true;
1954 if (dump_file && (dump_flags & TDF_DETAILS))
1955 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1958 if (use)
1960 bitmap_set_bit (use->related_cands, i);
1961 if (dump_file && (dump_flags & TDF_DETAILS))
1962 fprintf (dump_file, "Candidate %d is related to use %d\n",
1963 cand->id, use->id);
1966 return cand;
1969 /* Returns true if incrementing the induction variable at the end of the LOOP
1970 is allowed.
1972 The purpose is to avoid splitting latch edge with a biv increment, thus
1973 creating a jump, possibly confusing other optimization passes and leaving
1974 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1975 is not available (so we do not have a better alternative), or if the latch
1976 edge is already nonempty. */
1978 static bool
1979 allow_ip_end_pos_p (struct loop *loop)
1981 if (!ip_normal_pos (loop))
1982 return true;
1984 if (!empty_block_p (ip_end_pos (loop)))
1985 return true;
1987 return false;
1990 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1991 position to POS. If USE is not NULL, the candidate is set as related to
1992 it. The candidate computation is scheduled on all available positions. */
1994 static void
1995 add_candidate (struct ivopts_data *data,
1996 tree base, tree step, bool important, struct iv_use *use)
1998 if (ip_normal_pos (data->current_loop))
1999 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2000 if (ip_end_pos (data->current_loop)
2001 && allow_ip_end_pos_p (data->current_loop))
2002 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2005 /* Add a standard "0 + 1 * iteration" iv candidate for a
2006 type with SIZE bits. */
2008 static void
2009 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2010 unsigned int size)
2012 tree type = lang_hooks.types.type_for_size (size, true);
2013 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2014 true, NULL);
2017 /* Adds standard iv candidates. */
2019 static void
2020 add_standard_iv_candidates (struct ivopts_data *data)
2022 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2024 /* The same for a double-integer type if it is still fast enough. */
2025 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2026 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2030 /* Adds candidates bases on the old induction variable IV. */
2032 static void
2033 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2035 tree phi, def;
2036 struct iv_cand *cand;
2038 add_candidate (data, iv->base, iv->step, true, NULL);
2040 /* The same, but with initial value zero. */
2041 add_candidate (data,
2042 build_int_cst (TREE_TYPE (iv->base), 0),
2043 iv->step, true, NULL);
2045 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2046 if (TREE_CODE (phi) == PHI_NODE)
2048 /* Additionally record the possibility of leaving the original iv
2049 untouched. */
2050 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2051 cand = add_candidate_1 (data,
2052 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2053 SSA_NAME_DEF_STMT (def));
2054 cand->var_before = iv->ssa_name;
2055 cand->var_after = def;
2059 /* Adds candidates based on the old induction variables. */
2061 static void
2062 add_old_ivs_candidates (struct ivopts_data *data)
2064 unsigned i;
2065 struct iv *iv;
2066 bitmap_iterator bi;
2068 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2070 iv = ver_info (data, i)->iv;
2071 if (iv && iv->biv_p && !zero_p (iv->step))
2072 add_old_iv_candidates (data, iv);
2076 /* Adds candidates based on the value of the induction variable IV and USE. */
2078 static void
2079 add_iv_value_candidates (struct ivopts_data *data,
2080 struct iv *iv, struct iv_use *use)
2082 add_candidate (data, iv->base, iv->step, false, use);
2084 /* The same, but with initial value zero. */
2085 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2086 iv->step, false, use);
2089 /* Adds candidates based on the address IV and USE. */
2091 static void
2092 add_address_candidates (struct ivopts_data *data,
2093 struct iv *iv, struct iv_use *use)
2095 tree base, abase;
2096 unsigned HOST_WIDE_INT offset;
2098 /* First, the trivial choices. */
2099 add_iv_value_candidates (data, iv, use);
2101 /* Second, try removing the COMPONENT_REFs. */
2102 if (TREE_CODE (iv->base) == ADDR_EXPR)
2104 base = TREE_OPERAND (iv->base, 0);
2105 while (TREE_CODE (base) == COMPONENT_REF
2106 || (TREE_CODE (base) == ARRAY_REF
2107 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2108 base = TREE_OPERAND (base, 0);
2110 if (base != TREE_OPERAND (iv->base, 0))
2112 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2113 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2115 if (TREE_CODE (base) == INDIRECT_REF)
2116 base = TREE_OPERAND (base, 0);
2117 else
2118 base = build_addr (base);
2119 add_candidate (data, base, iv->step, false, use);
2123 /* Third, try removing the constant offset. */
2124 abase = iv->base;
2125 base = strip_offset (abase, false, &offset);
2126 if (offset)
2127 add_candidate (data, base, iv->step, false, use);
2130 /* Possibly adds pseudocandidate for replacing the final value of USE by
2131 a direct computation. */
2133 static void
2134 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2136 struct tree_niter_desc *niter;
2138 /* We must know where we exit the loop and how many times does it roll. */
2139 niter = niter_for_single_dom_exit (data);
2140 if (!niter
2141 || !zero_p (niter->may_be_zero))
2142 return;
2144 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2147 /* Adds candidates based on the uses. */
2149 static void
2150 add_derived_ivs_candidates (struct ivopts_data *data)
2152 unsigned i;
2154 for (i = 0; i < n_iv_uses (data); i++)
2156 struct iv_use *use = iv_use (data, i);
2158 if (!use)
2159 continue;
2161 switch (use->type)
2163 case USE_NONLINEAR_EXPR:
2164 case USE_COMPARE:
2165 /* Just add the ivs based on the value of the iv used here. */
2166 add_iv_value_candidates (data, use->iv, use);
2167 break;
2169 case USE_OUTER:
2170 add_iv_value_candidates (data, use->iv, use);
2172 /* Additionally, add the pseudocandidate for the possibility to
2173 replace the final value by a direct computation. */
2174 add_iv_outer_candidates (data, use);
2175 break;
2177 case USE_ADDRESS:
2178 add_address_candidates (data, use->iv, use);
2179 break;
2181 default:
2182 gcc_unreachable ();
2187 /* Record important candidates and add them to related_cands bitmaps
2188 if needed. */
2190 static void
2191 record_important_candidates (struct ivopts_data *data)
2193 unsigned i;
2194 struct iv_use *use;
2196 for (i = 0; i < n_iv_cands (data); i++)
2198 struct iv_cand *cand = iv_cand (data, i);
2200 if (cand->important)
2201 bitmap_set_bit (data->important_candidates, i);
2204 data->consider_all_candidates = (n_iv_cands (data)
2205 <= CONSIDER_ALL_CANDIDATES_BOUND);
2207 if (data->consider_all_candidates)
2209 /* We will not need "related_cands" bitmaps in this case,
2210 so release them to decrease peak memory consumption. */
2211 for (i = 0; i < n_iv_uses (data); i++)
2213 use = iv_use (data, i);
2214 BITMAP_FREE (use->related_cands);
2217 else
2219 /* Add important candidates to the related_cands bitmaps. */
2220 for (i = 0; i < n_iv_uses (data); i++)
2221 bitmap_ior_into (iv_use (data, i)->related_cands,
2222 data->important_candidates);
2226 /* Finds the candidates for the induction variables. */
2228 static void
2229 find_iv_candidates (struct ivopts_data *data)
2231 /* Add commonly used ivs. */
2232 add_standard_iv_candidates (data);
2234 /* Add old induction variables. */
2235 add_old_ivs_candidates (data);
2237 /* Add induction variables derived from uses. */
2238 add_derived_ivs_candidates (data);
2240 /* Record the important candidates. */
2241 record_important_candidates (data);
2244 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2245 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2246 we allocate a simple list to every use. */
2248 static void
2249 alloc_use_cost_map (struct ivopts_data *data)
2251 unsigned i, size, s, j;
2253 for (i = 0; i < n_iv_uses (data); i++)
2255 struct iv_use *use = iv_use (data, i);
2256 bitmap_iterator bi;
2258 if (data->consider_all_candidates)
2259 size = n_iv_cands (data);
2260 else
2262 s = 0;
2263 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2265 s++;
2268 /* Round up to the power of two, so that moduling by it is fast. */
2269 for (size = 1; size < s; size <<= 1)
2270 continue;
2273 use->n_map_members = size;
2274 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2278 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2279 on invariants DEPENDS_ON. */
2281 static void
2282 set_use_iv_cost (struct ivopts_data *data,
2283 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2284 bitmap depends_on)
2286 unsigned i, s;
2288 if (cost == INFTY)
2290 BITMAP_FREE (depends_on);
2291 return;
2294 if (data->consider_all_candidates)
2296 use->cost_map[cand->id].cand = cand;
2297 use->cost_map[cand->id].cost = cost;
2298 use->cost_map[cand->id].depends_on = depends_on;
2299 return;
2302 /* n_map_members is a power of two, so this computes modulo. */
2303 s = cand->id & (use->n_map_members - 1);
2304 for (i = s; i < use->n_map_members; i++)
2305 if (!use->cost_map[i].cand)
2306 goto found;
2307 for (i = 0; i < s; i++)
2308 if (!use->cost_map[i].cand)
2309 goto found;
2311 gcc_unreachable ();
2313 found:
2314 use->cost_map[i].cand = cand;
2315 use->cost_map[i].cost = cost;
2316 use->cost_map[i].depends_on = depends_on;
2319 /* Gets cost of (USE, CANDIDATE) pair. */
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323 struct iv_cand *cand)
2325 unsigned i, s;
2326 struct cost_pair *ret;
2328 if (!cand)
2329 return NULL;
2331 if (data->consider_all_candidates)
2333 ret = use->cost_map + cand->id;
2334 if (!ret->cand)
2335 return NULL;
2337 return ret;
2340 /* n_map_members is a power of two, so this computes modulo. */
2341 s = cand->id & (use->n_map_members - 1);
2342 for (i = s; i < use->n_map_members; i++)
2343 if (use->cost_map[i].cand == cand)
2344 return use->cost_map + i;
2346 for (i = 0; i < s; i++)
2347 if (use->cost_map[i].cand == cand)
2348 return use->cost_map + i;
2350 return NULL;
2353 /* Returns estimate on cost of computing SEQ. */
2355 static unsigned
2356 seq_cost (rtx seq)
2358 unsigned cost = 0;
2359 rtx set;
2361 for (; seq; seq = NEXT_INSN (seq))
2363 set = single_set (seq);
2364 if (set)
2365 cost += rtx_cost (set, SET);
2366 else
2367 cost++;
2370 return cost;
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2374 static rtx
2375 produce_memory_decl_rtl (tree obj, int *regno)
2377 rtx x;
2378 if (!obj)
2379 abort ();
2380 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383 x = gen_rtx_SYMBOL_REF (Pmode, name);
2385 else
2386 x = gen_raw_REG (Pmode, (*regno)++);
2388 return gen_rtx_MEM (DECL_MODE (obj), x);
2391 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2392 walk_tree. DATA contains the actual fake register number. */
2394 static tree
2395 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2397 tree obj = NULL_TREE;
2398 rtx x = NULL_RTX;
2399 int *regno = data;
2401 switch (TREE_CODE (*expr_p))
2403 case ADDR_EXPR:
2404 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2405 handled_component_p (*expr_p);
2406 expr_p = &TREE_OPERAND (*expr_p, 0))
2407 continue;
2408 obj = *expr_p;
2409 if (DECL_P (obj))
2410 x = produce_memory_decl_rtl (obj, regno);
2411 break;
2413 case SSA_NAME:
2414 *ws = 0;
2415 obj = SSA_NAME_VAR (*expr_p);
2416 if (!DECL_RTL_SET_P (obj))
2417 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2418 break;
2420 case VAR_DECL:
2421 case PARM_DECL:
2422 case RESULT_DECL:
2423 *ws = 0;
2424 obj = *expr_p;
2426 if (DECL_RTL_SET_P (obj))
2427 break;
2429 if (DECL_MODE (obj) == BLKmode)
2430 x = produce_memory_decl_rtl (obj, regno);
2431 else
2432 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2434 break;
2436 default:
2437 break;
2440 if (x)
2442 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2443 SET_DECL_RTL (obj, x);
2446 return NULL_TREE;
2449 /* Determines cost of the computation of EXPR. */
2451 static unsigned
2452 computation_cost (tree expr)
2454 rtx seq, rslt;
2455 tree type = TREE_TYPE (expr);
2456 unsigned cost;
2457 /* Avoid using hard regs in ways which may be unsupported. */
2458 int regno = LAST_VIRTUAL_REGISTER + 1;
2460 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2461 start_sequence ();
2462 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2463 seq = get_insns ();
2464 end_sequence ();
2466 cost = seq_cost (seq);
2467 if (GET_CODE (rslt) == MEM)
2468 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2470 return cost;
2473 /* Returns variable containing the value of candidate CAND at statement AT. */
2475 static tree
2476 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2478 if (stmt_after_increment (loop, cand, stmt))
2479 return cand->var_after;
2480 else
2481 return cand->var_before;
2484 /* Determines the expression by that USE is expressed from induction variable
2485 CAND at statement AT in LOOP. */
2487 static tree
2488 get_computation_at (struct loop *loop,
2489 struct iv_use *use, struct iv_cand *cand, tree at)
2491 tree ubase = use->iv->base;
2492 tree ustep = use->iv->step;
2493 tree cbase = cand->iv->base;
2494 tree cstep = cand->iv->step;
2495 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2496 tree uutype;
2497 tree expr, delta;
2498 tree ratio;
2499 unsigned HOST_WIDE_INT ustepi, cstepi;
2500 HOST_WIDE_INT ratioi;
2502 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2504 /* We do not have a precision to express the values of use. */
2505 return NULL_TREE;
2508 expr = var_at_stmt (loop, cand, at);
2510 if (TREE_TYPE (expr) != ctype)
2512 /* This may happen with the original ivs. */
2513 expr = fold_convert (ctype, expr);
2516 if (TYPE_UNSIGNED (utype))
2517 uutype = utype;
2518 else
2520 uutype = unsigned_type_for (utype);
2521 ubase = fold_convert (uutype, ubase);
2522 ustep = fold_convert (uutype, ustep);
2525 if (uutype != ctype)
2527 expr = fold_convert (uutype, expr);
2528 cbase = fold_convert (uutype, cbase);
2529 cstep = fold_convert (uutype, cstep);
2532 if (!cst_and_fits_in_hwi (cstep)
2533 || !cst_and_fits_in_hwi (ustep))
2534 return NULL_TREE;
2536 ustepi = int_cst_value (ustep);
2537 cstepi = int_cst_value (cstep);
2539 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2541 /* TODO maybe consider case when ustep divides cstep and the ratio is
2542 a power of 2 (so that the division is fast to execute)? We would
2543 need to be much more careful with overflows etc. then. */
2544 return NULL_TREE;
2547 /* We may need to shift the value if we are after the increment. */
2548 if (stmt_after_increment (loop, cand, at))
2549 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2551 /* use = ubase - ratio * cbase + ratio * var.
2553 In general case ubase + ratio * (var - cbase) could be better (one less
2554 multiplication), but often it is possible to eliminate redundant parts
2555 of computations from (ubase - ratio * cbase) term, and if it does not
2556 happen, fold is able to apply the distributive law to obtain this form
2557 anyway. */
2559 if (ratioi == 1)
2561 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2562 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2564 else if (ratioi == -1)
2566 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2567 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2569 else
2571 ratio = build_int_cst_type (uutype, ratioi);
2572 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2573 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2574 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2575 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2578 return fold_convert (utype, expr);
2581 /* Determines the expression by that USE is expressed from induction variable
2582 CAND in LOOP. */
2584 static tree
2585 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2587 return get_computation_at (loop, use, cand, use->stmt);
2590 /* Returns cost of addition in MODE. */
2592 static unsigned
2593 add_cost (enum machine_mode mode)
2595 static unsigned costs[NUM_MACHINE_MODES];
2596 rtx seq;
2597 unsigned cost;
2599 if (costs[mode])
2600 return costs[mode];
2602 start_sequence ();
2603 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2604 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2605 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2606 NULL_RTX);
2607 seq = get_insns ();
2608 end_sequence ();
2610 cost = seq_cost (seq);
2611 if (!cost)
2612 cost = 1;
2614 costs[mode] = cost;
2616 if (dump_file && (dump_flags & TDF_DETAILS))
2617 fprintf (dump_file, "Addition in %s costs %d\n",
2618 GET_MODE_NAME (mode), cost);
2619 return cost;
2622 /* Entry in a hashtable of already known costs for multiplication. */
2623 struct mbc_entry
2625 HOST_WIDE_INT cst; /* The constant to multiply by. */
2626 enum machine_mode mode; /* In mode. */
2627 unsigned cost; /* The cost. */
2630 /* Counts hash value for the ENTRY. */
2632 static hashval_t
2633 mbc_entry_hash (const void *entry)
2635 const struct mbc_entry *e = entry;
2637 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2640 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2642 static int
2643 mbc_entry_eq (const void *entry1, const void *entry2)
2645 const struct mbc_entry *e1 = entry1;
2646 const struct mbc_entry *e2 = entry2;
2648 return (e1->mode == e2->mode
2649 && e1->cst == e2->cst);
2652 /* Returns cost of multiplication by constant CST in MODE. */
2654 static unsigned
2655 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2657 static htab_t costs;
2658 struct mbc_entry **cached, act;
2659 rtx seq;
2660 unsigned cost;
2662 if (!costs)
2663 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2665 act.mode = mode;
2666 act.cst = cst;
2667 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2668 if (*cached)
2669 return (*cached)->cost;
2671 *cached = xmalloc (sizeof (struct mbc_entry));
2672 (*cached)->mode = mode;
2673 (*cached)->cst = cst;
2675 start_sequence ();
2676 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2677 NULL_RTX, 0);
2678 seq = get_insns ();
2679 end_sequence ();
2681 cost = seq_cost (seq);
2683 if (dump_file && (dump_flags & TDF_DETAILS))
2684 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2685 (int) cst, GET_MODE_NAME (mode), cost);
2687 (*cached)->cost = cost;
2689 return cost;
2692 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2693 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2694 variable is omitted. The created memory accesses MODE.
2696 TODO -- there must be some better way. This all is quite crude. */
2698 static unsigned
2699 get_address_cost (bool symbol_present, bool var_present,
2700 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2702 #define MAX_RATIO 128
2703 static sbitmap valid_mult;
2704 static HOST_WIDE_INT rat, off;
2705 static HOST_WIDE_INT min_offset, max_offset;
2706 static unsigned costs[2][2][2][2];
2707 unsigned cost, acost;
2708 rtx seq, addr, base;
2709 bool offset_p, ratio_p;
2710 rtx reg1;
2711 HOST_WIDE_INT s_offset;
2712 unsigned HOST_WIDE_INT mask;
2713 unsigned bits;
2715 if (!valid_mult)
2717 HOST_WIDE_INT i;
2719 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2721 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2722 for (i = 1; i <= 1 << 20; i <<= 1)
2724 XEXP (addr, 1) = GEN_INT (i);
2725 if (!memory_address_p (Pmode, addr))
2726 break;
2728 max_offset = i >> 1;
2729 off = max_offset;
2731 for (i = 1; i <= 1 << 20; i <<= 1)
2733 XEXP (addr, 1) = GEN_INT (-i);
2734 if (!memory_address_p (Pmode, addr))
2735 break;
2737 min_offset = -(i >> 1);
2739 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, "get_address_cost:\n");
2742 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2743 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2746 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2747 sbitmap_zero (valid_mult);
2748 rat = 1;
2749 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2750 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2752 XEXP (addr, 1) = GEN_INT (i);
2753 if (memory_address_p (Pmode, addr))
2755 SET_BIT (valid_mult, i + MAX_RATIO);
2756 rat = i;
2760 if (dump_file && (dump_flags & TDF_DETAILS))
2762 fprintf (dump_file, " allowed multipliers:");
2763 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2764 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2765 fprintf (dump_file, " %d", (int) i);
2766 fprintf (dump_file, "\n");
2767 fprintf (dump_file, "\n");
2771 bits = GET_MODE_BITSIZE (Pmode);
2772 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2773 offset &= mask;
2774 if ((offset >> (bits - 1) & 1))
2775 offset |= ~mask;
2776 s_offset = offset;
2778 cost = 0;
2779 offset_p = (s_offset != 0
2780 && min_offset <= s_offset && s_offset <= max_offset);
2781 ratio_p = (ratio != 1
2782 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2783 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2785 if (ratio != 1 && !ratio_p)
2786 cost += multiply_by_cost (ratio, Pmode);
2788 if (s_offset && !offset_p && !symbol_present)
2790 cost += add_cost (Pmode);
2791 var_present = true;
2794 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2795 if (!acost)
2797 acost = 0;
2799 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2800 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2801 if (ratio_p)
2802 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2804 if (var_present)
2805 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2807 if (symbol_present)
2809 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2810 if (offset_p)
2811 base = gen_rtx_fmt_e (CONST, Pmode,
2812 gen_rtx_fmt_ee (PLUS, Pmode,
2813 base,
2814 GEN_INT (off)));
2816 else if (offset_p)
2817 base = GEN_INT (off);
2818 else
2819 base = NULL_RTX;
2821 if (base)
2822 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2824 start_sequence ();
2825 addr = memory_address (Pmode, addr);
2826 seq = get_insns ();
2827 end_sequence ();
2829 acost = seq_cost (seq);
2830 acost += address_cost (addr, Pmode);
2832 if (!acost)
2833 acost = 1;
2834 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2837 return cost + acost;
2840 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2841 the bitmap to that we should store it. */
2843 static struct ivopts_data *fd_ivopts_data;
2844 static tree
2845 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2847 bitmap *depends_on = data;
2848 struct version_info *info;
2850 if (TREE_CODE (*expr_p) != SSA_NAME)
2851 return NULL_TREE;
2852 info = name_info (fd_ivopts_data, *expr_p);
2854 if (!info->inv_id || info->has_nonlin_use)
2855 return NULL_TREE;
2857 if (!*depends_on)
2858 *depends_on = BITMAP_ALLOC (NULL);
2859 bitmap_set_bit (*depends_on, info->inv_id);
2861 return NULL_TREE;
2864 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2865 invariants the computation depends on. */
2867 static unsigned
2868 force_var_cost (struct ivopts_data *data,
2869 tree expr, bitmap *depends_on)
2871 static bool costs_initialized = false;
2872 static unsigned integer_cost;
2873 static unsigned symbol_cost;
2874 static unsigned address_cost;
2875 tree op0, op1;
2876 unsigned cost0, cost1, cost;
2877 enum machine_mode mode;
2879 if (!costs_initialized)
2881 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2882 rtx x = gen_rtx_MEM (DECL_MODE (var),
2883 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2884 tree addr;
2885 tree type = build_pointer_type (integer_type_node);
2887 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2888 2000));
2890 SET_DECL_RTL (var, x);
2891 TREE_STATIC (var) = 1;
2892 addr = build1 (ADDR_EXPR, type, var);
2893 symbol_cost = computation_cost (addr) + 1;
2895 address_cost
2896 = computation_cost (build2 (PLUS_EXPR, type,
2897 addr,
2898 build_int_cst_type (type, 2000))) + 1;
2899 if (dump_file && (dump_flags & TDF_DETAILS))
2901 fprintf (dump_file, "force_var_cost:\n");
2902 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2903 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2904 fprintf (dump_file, " address %d\n", (int) address_cost);
2905 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2906 fprintf (dump_file, "\n");
2909 costs_initialized = true;
2912 STRIP_NOPS (expr);
2914 if (depends_on)
2916 fd_ivopts_data = data;
2917 walk_tree (&expr, find_depends, depends_on, NULL);
2920 if (SSA_VAR_P (expr))
2921 return 0;
2923 if (TREE_INVARIANT (expr))
2925 if (TREE_CODE (expr) == INTEGER_CST)
2926 return integer_cost;
2928 if (TREE_CODE (expr) == ADDR_EXPR)
2930 tree obj = TREE_OPERAND (expr, 0);
2932 if (TREE_CODE (obj) == VAR_DECL
2933 || TREE_CODE (obj) == PARM_DECL
2934 || TREE_CODE (obj) == RESULT_DECL)
2935 return symbol_cost;
2938 return address_cost;
2941 switch (TREE_CODE (expr))
2943 case PLUS_EXPR:
2944 case MINUS_EXPR:
2945 case MULT_EXPR:
2946 op0 = TREE_OPERAND (expr, 0);
2947 op1 = TREE_OPERAND (expr, 1);
2948 STRIP_NOPS (op0);
2949 STRIP_NOPS (op1);
2951 if (is_gimple_val (op0))
2952 cost0 = 0;
2953 else
2954 cost0 = force_var_cost (data, op0, NULL);
2956 if (is_gimple_val (op1))
2957 cost1 = 0;
2958 else
2959 cost1 = force_var_cost (data, op1, NULL);
2961 break;
2963 default:
2964 /* Just an arbitrary value, FIXME. */
2965 return target_spill_cost;
2968 mode = TYPE_MODE (TREE_TYPE (expr));
2969 switch (TREE_CODE (expr))
2971 case PLUS_EXPR:
2972 case MINUS_EXPR:
2973 cost = add_cost (mode);
2974 break;
2976 case MULT_EXPR:
2977 if (cst_and_fits_in_hwi (op0))
2978 cost = multiply_by_cost (int_cst_value (op0), mode);
2979 else if (cst_and_fits_in_hwi (op1))
2980 cost = multiply_by_cost (int_cst_value (op1), mode);
2981 else
2982 return target_spill_cost;
2983 break;
2985 default:
2986 gcc_unreachable ();
2989 cost += cost0;
2990 cost += cost1;
2992 /* Bound the cost by target_spill_cost. The parts of complicated
2993 computations often are either loop invariant or at least can
2994 be shared between several iv uses, so letting this grow without
2995 limits would not give reasonable results. */
2996 return cost < target_spill_cost ? cost : target_spill_cost;
2999 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3000 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3001 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3002 invariants the computation depends on. */
3004 static unsigned
3005 split_address_cost (struct ivopts_data *data,
3006 tree addr, bool *symbol_present, bool *var_present,
3007 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3009 tree core;
3010 HOST_WIDE_INT bitsize;
3011 HOST_WIDE_INT bitpos;
3012 tree toffset;
3013 enum machine_mode mode;
3014 int unsignedp, volatilep;
3016 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3017 &unsignedp, &volatilep, false);
3019 if (toffset != 0
3020 || bitpos % BITS_PER_UNIT != 0
3021 || TREE_CODE (core) != VAR_DECL)
3023 *symbol_present = false;
3024 *var_present = true;
3025 fd_ivopts_data = data;
3026 walk_tree (&addr, find_depends, depends_on, NULL);
3027 return target_spill_cost;
3030 *offset += bitpos / BITS_PER_UNIT;
3031 if (TREE_STATIC (core)
3032 || DECL_EXTERNAL (core))
3034 *symbol_present = true;
3035 *var_present = false;
3036 return 0;
3039 *symbol_present = false;
3040 *var_present = true;
3041 return 0;
3044 /* Estimates cost of expressing difference of addresses E1 - E2 as
3045 var + symbol + offset. The value of offset is added to OFFSET,
3046 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3047 part is missing. DEPENDS_ON is a set of the invariants the computation
3048 depends on. */
3050 static unsigned
3051 ptr_difference_cost (struct ivopts_data *data,
3052 tree e1, tree e2, bool *symbol_present, bool *var_present,
3053 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3055 HOST_WIDE_INT diff = 0;
3056 unsigned cost;
3058 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3060 if (ptr_difference_const (e1, e2, &diff))
3062 *offset += diff;
3063 *symbol_present = false;
3064 *var_present = false;
3065 return 0;
3068 if (e2 == integer_zero_node)
3069 return split_address_cost (data, TREE_OPERAND (e1, 0),
3070 symbol_present, var_present, offset, depends_on);
3072 *symbol_present = false;
3073 *var_present = true;
3075 cost = force_var_cost (data, e1, depends_on);
3076 cost += force_var_cost (data, e2, depends_on);
3077 cost += add_cost (Pmode);
3079 return cost;
3082 /* Estimates cost of expressing difference E1 - E2 as
3083 var + symbol + offset. The value of offset is added to OFFSET,
3084 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3085 part is missing. DEPENDS_ON is a set of the invariants the computation
3086 depends on. */
3088 static unsigned
3089 difference_cost (struct ivopts_data *data,
3090 tree e1, tree e2, bool *symbol_present, bool *var_present,
3091 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3093 unsigned cost;
3094 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3095 unsigned HOST_WIDE_INT off1, off2;
3097 e1 = strip_offset (e1, false, &off1);
3098 e2 = strip_offset (e2, false, &off2);
3099 *offset += off1 - off2;
3101 STRIP_NOPS (e1);
3102 STRIP_NOPS (e2);
3104 if (TREE_CODE (e1) == ADDR_EXPR)
3105 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3106 depends_on);
3107 *symbol_present = false;
3109 if (operand_equal_p (e1, e2, 0))
3111 *var_present = false;
3112 return 0;
3114 *var_present = true;
3115 if (zero_p (e2))
3116 return force_var_cost (data, e1, depends_on);
3118 if (zero_p (e1))
3120 cost = force_var_cost (data, e2, depends_on);
3121 cost += multiply_by_cost (-1, mode);
3123 return cost;
3126 cost = force_var_cost (data, e1, depends_on);
3127 cost += force_var_cost (data, e2, depends_on);
3128 cost += add_cost (mode);
3130 return cost;
3133 /* Determines the cost of the computation by that USE is expressed
3134 from induction variable CAND. If ADDRESS_P is true, we just need
3135 to create an address from it, otherwise we want to get it into
3136 register. A set of invariants we depend on is stored in
3137 DEPENDS_ON. AT is the statement at that the value is computed. */
3139 static unsigned
3140 get_computation_cost_at (struct ivopts_data *data,
3141 struct iv_use *use, struct iv_cand *cand,
3142 bool address_p, bitmap *depends_on, tree at)
3144 tree ubase = use->iv->base, ustep = use->iv->step;
3145 tree cbase, cstep;
3146 tree utype = TREE_TYPE (ubase), ctype;
3147 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3148 HOST_WIDE_INT ratio, aratio;
3149 bool var_present, symbol_present;
3150 unsigned cost = 0, n_sums;
3152 *depends_on = NULL;
3154 /* Only consider real candidates. */
3155 if (!cand->iv)
3156 return INFTY;
3158 cbase = cand->iv->base;
3159 cstep = cand->iv->step;
3160 ctype = TREE_TYPE (cbase);
3162 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3164 /* We do not have a precision to express the values of use. */
3165 return INFTY;
3168 if (address_p)
3170 /* Do not try to express address of an object with computation based
3171 on address of a different object. This may cause problems in rtl
3172 level alias analysis (that does not expect this to be happening,
3173 as this is illegal in C), and would be unlikely to be useful
3174 anyway. */
3175 if (use->iv->base_object
3176 && cand->iv->base_object
3177 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3178 return INFTY;
3181 if (!cst_and_fits_in_hwi (ustep)
3182 || !cst_and_fits_in_hwi (cstep))
3183 return INFTY;
3185 if (TREE_CODE (ubase) == INTEGER_CST
3186 && !cst_and_fits_in_hwi (ubase))
3187 goto fallback;
3189 if (TREE_CODE (cbase) == INTEGER_CST
3190 && !cst_and_fits_in_hwi (cbase))
3191 goto fallback;
3193 ustepi = int_cst_value (ustep);
3194 cstepi = int_cst_value (cstep);
3196 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3198 /* TODO -- add direct handling of this case. */
3199 goto fallback;
3202 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3203 return INFTY;
3205 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3206 or ratio == 1, it is better to handle this like
3208 ubase - ratio * cbase + ratio * var
3210 (also holds in the case ratio == -1, TODO. */
3212 if (TREE_CODE (cbase) == INTEGER_CST)
3214 offset = - ratio * int_cst_value (cbase);
3215 cost += difference_cost (data,
3216 ubase, integer_zero_node,
3217 &symbol_present, &var_present, &offset,
3218 depends_on);
3220 else if (ratio == 1)
3222 cost += difference_cost (data,
3223 ubase, cbase,
3224 &symbol_present, &var_present, &offset,
3225 depends_on);
3227 else
3229 cost += force_var_cost (data, cbase, depends_on);
3230 cost += add_cost (TYPE_MODE (ctype));
3231 cost += difference_cost (data,
3232 ubase, integer_zero_node,
3233 &symbol_present, &var_present, &offset,
3234 depends_on);
3237 /* If we are after the increment, the value of the candidate is higher by
3238 one iteration. */
3239 if (stmt_after_increment (data->current_loop, cand, at))
3240 offset -= ratio * cstepi;
3242 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3243 (symbol/var/const parts may be omitted). If we are looking for an address,
3244 find the cost of addressing this. */
3245 if (address_p)
3246 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3248 /* Otherwise estimate the costs for computing the expression. */
3249 aratio = ratio > 0 ? ratio : -ratio;
3250 if (!symbol_present && !var_present && !offset)
3252 if (ratio != 1)
3253 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3255 return cost;
3258 if (aratio != 1)
3259 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3261 n_sums = 1;
3262 if (var_present
3263 /* Symbol + offset should be compile-time computable. */
3264 && (symbol_present || offset))
3265 n_sums++;
3267 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3269 fallback:
3271 /* Just get the expression, expand it and measure the cost. */
3272 tree comp = get_computation_at (data->current_loop, use, cand, at);
3274 if (!comp)
3275 return INFTY;
3277 if (address_p)
3278 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3280 return computation_cost (comp);
3284 /* Determines the cost of the computation by that USE is expressed
3285 from induction variable CAND. If ADDRESS_P is true, we just need
3286 to create an address from it, otherwise we want to get it into
3287 register. A set of invariants we depend on is stored in
3288 DEPENDS_ON. */
3290 static unsigned
3291 get_computation_cost (struct ivopts_data *data,
3292 struct iv_use *use, struct iv_cand *cand,
3293 bool address_p, bitmap *depends_on)
3295 return get_computation_cost_at (data,
3296 use, cand, address_p, depends_on, use->stmt);
3299 /* Determines cost of basing replacement of USE on CAND in a generic
3300 expression. */
3302 static bool
3303 determine_use_iv_cost_generic (struct ivopts_data *data,
3304 struct iv_use *use, struct iv_cand *cand)
3306 bitmap depends_on;
3307 unsigned cost;
3309 /* The simple case first -- if we need to express value of the preserved
3310 original biv, the cost is 0. This also prevents us from counting the
3311 cost of increment twice -- once at this use and once in the cost of
3312 the candidate. */
3313 if (cand->pos == IP_ORIGINAL
3314 && cand->incremented_at == use->stmt)
3316 set_use_iv_cost (data, use, cand, 0, NULL);
3317 return true;
3320 cost = get_computation_cost (data, use, cand, false, &depends_on);
3321 set_use_iv_cost (data, use, cand, cost, depends_on);
3323 return cost != INFTY;
3326 /* Determines cost of basing replacement of USE on CAND in an address. */
3328 static bool
3329 determine_use_iv_cost_address (struct ivopts_data *data,
3330 struct iv_use *use, struct iv_cand *cand)
3332 bitmap depends_on;
3333 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3335 set_use_iv_cost (data, use, cand, cost, depends_on);
3337 return cost != INFTY;
3340 /* Computes value of induction variable IV in iteration NITER. */
3342 static tree
3343 iv_value (struct iv *iv, tree niter)
3345 tree val;
3346 tree type = TREE_TYPE (iv->base);
3348 niter = fold_convert (type, niter);
3349 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3351 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3354 /* Computes value of candidate CAND at position AT in iteration NITER. */
3356 static tree
3357 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3359 tree val = iv_value (cand->iv, niter);
3360 tree type = TREE_TYPE (cand->iv->base);
3362 if (stmt_after_increment (loop, cand, at))
3363 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3365 return val;
3368 /* Returns period of induction variable iv. */
3370 static tree
3371 iv_period (struct iv *iv)
3373 tree step = iv->step, period, type;
3374 tree pow2div;
3376 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3378 /* Period of the iv is gcd (step, type range). Since type range is power
3379 of two, it suffices to determine the maximum power of two that divides
3380 step. */
3381 pow2div = num_ending_zeros (step);
3382 type = unsigned_type_for (TREE_TYPE (step));
3384 period = build_low_bits_mask (type,
3385 (TYPE_PRECISION (type)
3386 - tree_low_cst (pow2div, 1)));
3388 return period;
3391 /* Check whether it is possible to express the condition in USE by comparison
3392 of candidate CAND. If so, store the comparison code to COMPARE and the
3393 value compared with to BOUND. */
3395 static bool
3396 may_eliminate_iv (struct ivopts_data *data,
3397 struct iv_use *use, struct iv_cand *cand,
3398 enum tree_code *compare, tree *bound)
3400 basic_block ex_bb;
3401 edge exit;
3402 struct tree_niter_desc *niter;
3403 tree nit, nit_type;
3404 tree wider_type, period, per_type;
3405 struct loop *loop = data->current_loop;
3407 /* For now works only for exits that dominate the loop latch. TODO -- extend
3408 for other conditions inside loop body. */
3409 ex_bb = bb_for_stmt (use->stmt);
3410 if (use->stmt != last_stmt (ex_bb)
3411 || TREE_CODE (use->stmt) != COND_EXPR)
3412 return false;
3413 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3414 return false;
3416 exit = EDGE_SUCC (ex_bb, 0);
3417 if (flow_bb_inside_loop_p (loop, exit->dest))
3418 exit = EDGE_SUCC (ex_bb, 1);
3419 if (flow_bb_inside_loop_p (loop, exit->dest))
3420 return false;
3422 niter = niter_for_exit (data, exit);
3423 if (!niter
3424 || !zero_p (niter->may_be_zero))
3425 return false;
3427 nit = niter->niter;
3428 nit_type = TREE_TYPE (nit);
3430 /* Determine whether we may use the variable to test whether niter iterations
3431 elapsed. This is the case iff the period of the induction variable is
3432 greater than the number of iterations. */
3433 period = iv_period (cand->iv);
3434 if (!period)
3435 return false;
3436 per_type = TREE_TYPE (period);
3438 wider_type = TREE_TYPE (period);
3439 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3440 wider_type = per_type;
3441 else
3442 wider_type = nit_type;
3444 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3445 fold_convert (wider_type, period),
3446 fold_convert (wider_type, nit)))))
3447 return false;
3449 if (exit->flags & EDGE_TRUE_VALUE)
3450 *compare = EQ_EXPR;
3451 else
3452 *compare = NE_EXPR;
3454 *bound = cand_value_at (loop, cand, use->stmt, nit);
3455 return true;
3458 /* Determines cost of basing replacement of USE on CAND in a condition. */
3460 static bool
3461 determine_use_iv_cost_condition (struct ivopts_data *data,
3462 struct iv_use *use, struct iv_cand *cand)
3464 tree bound;
3465 enum tree_code compare;
3467 /* Only consider real candidates. */
3468 if (!cand->iv)
3470 set_use_iv_cost (data, use, cand, INFTY, NULL);
3471 return false;
3474 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3476 bitmap depends_on = NULL;
3477 unsigned cost = force_var_cost (data, bound, &depends_on);
3479 set_use_iv_cost (data, use, cand, cost, depends_on);
3480 return cost != INFTY;
3483 /* The induction variable elimination failed; just express the original
3484 giv. If it is compared with an invariant, note that we cannot get
3485 rid of it. */
3486 if (TREE_CODE (*use->op_p) == SSA_NAME)
3487 record_invariant (data, *use->op_p, true);
3488 else
3490 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3491 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3494 return determine_use_iv_cost_generic (data, use, cand);
3497 /* Checks whether it is possible to replace the final value of USE by
3498 a direct computation. If so, the formula is stored to *VALUE. */
3500 static bool
3501 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3502 tree *value)
3504 struct loop *loop = data->current_loop;
3505 edge exit;
3506 struct tree_niter_desc *niter;
3508 exit = single_dom_exit (loop);
3509 if (!exit)
3510 return false;
3512 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3513 bb_for_stmt (use->stmt)));
3515 niter = niter_for_single_dom_exit (data);
3516 if (!niter
3517 || !zero_p (niter->may_be_zero))
3518 return false;
3520 *value = iv_value (use->iv, niter->niter);
3522 return true;
3525 /* Determines cost of replacing final value of USE using CAND. */
3527 static bool
3528 determine_use_iv_cost_outer (struct ivopts_data *data,
3529 struct iv_use *use, struct iv_cand *cand)
3531 bitmap depends_on;
3532 unsigned cost;
3533 edge exit;
3534 tree value;
3535 struct loop *loop = data->current_loop;
3537 /* The simple case first -- if we need to express value of the preserved
3538 original biv, the cost is 0. This also prevents us from counting the
3539 cost of increment twice -- once at this use and once in the cost of
3540 the candidate. */
3541 if (cand->pos == IP_ORIGINAL
3542 && cand->incremented_at == use->stmt)
3544 set_use_iv_cost (data, use, cand, 0, NULL);
3545 return true;
3548 if (!cand->iv)
3550 if (!may_replace_final_value (data, use, &value))
3552 set_use_iv_cost (data, use, cand, INFTY, NULL);
3553 return false;
3556 depends_on = NULL;
3557 cost = force_var_cost (data, value, &depends_on);
3559 cost /= AVG_LOOP_NITER (loop);
3561 set_use_iv_cost (data, use, cand, cost, depends_on);
3562 return cost != INFTY;
3565 exit = single_dom_exit (loop);
3566 if (exit)
3568 /* If there is just a single exit, we may use value of the candidate
3569 after we take it to determine the value of use. */
3570 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3571 last_stmt (exit->src));
3572 if (cost != INFTY)
3573 cost /= AVG_LOOP_NITER (loop);
3575 else
3577 /* Otherwise we just need to compute the iv. */
3578 cost = get_computation_cost (data, use, cand, false, &depends_on);
3581 set_use_iv_cost (data, use, cand, cost, depends_on);
3583 return cost != INFTY;
3586 /* Determines cost of basing replacement of USE on CAND. Returns false
3587 if USE cannot be based on CAND. */
3589 static bool
3590 determine_use_iv_cost (struct ivopts_data *data,
3591 struct iv_use *use, struct iv_cand *cand)
3593 switch (use->type)
3595 case USE_NONLINEAR_EXPR:
3596 return determine_use_iv_cost_generic (data, use, cand);
3598 case USE_OUTER:
3599 return determine_use_iv_cost_outer (data, use, cand);
3601 case USE_ADDRESS:
3602 return determine_use_iv_cost_address (data, use, cand);
3604 case USE_COMPARE:
3605 return determine_use_iv_cost_condition (data, use, cand);
3607 default:
3608 gcc_unreachable ();
3612 /* Determines costs of basing the use of the iv on an iv candidate. */
3614 static void
3615 determine_use_iv_costs (struct ivopts_data *data)
3617 unsigned i, j;
3618 struct iv_use *use;
3619 struct iv_cand *cand;
3620 bitmap to_clear = BITMAP_ALLOC (NULL);
3622 alloc_use_cost_map (data);
3624 for (i = 0; i < n_iv_uses (data); i++)
3626 use = iv_use (data, i);
3628 if (data->consider_all_candidates)
3630 for (j = 0; j < n_iv_cands (data); j++)
3632 cand = iv_cand (data, j);
3633 determine_use_iv_cost (data, use, cand);
3636 else
3638 bitmap_iterator bi;
3640 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3642 cand = iv_cand (data, j);
3643 if (!determine_use_iv_cost (data, use, cand))
3644 bitmap_set_bit (to_clear, j);
3647 /* Remove the candidates for that the cost is infinite from
3648 the list of related candidates. */
3649 bitmap_and_compl_into (use->related_cands, to_clear);
3650 bitmap_clear (to_clear);
3654 BITMAP_FREE (to_clear);
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3658 fprintf (dump_file, "Use-candidate costs:\n");
3660 for (i = 0; i < n_iv_uses (data); i++)
3662 use = iv_use (data, i);
3664 fprintf (dump_file, "Use %d:\n", i);
3665 fprintf (dump_file, " cand\tcost\tdepends on\n");
3666 for (j = 0; j < use->n_map_members; j++)
3668 if (!use->cost_map[j].cand
3669 || use->cost_map[j].cost == INFTY)
3670 continue;
3672 fprintf (dump_file, " %d\t%d\t",
3673 use->cost_map[j].cand->id,
3674 use->cost_map[j].cost);
3675 if (use->cost_map[j].depends_on)
3676 bitmap_print (dump_file,
3677 use->cost_map[j].depends_on, "","");
3678 fprintf (dump_file, "\n");
3681 fprintf (dump_file, "\n");
3683 fprintf (dump_file, "\n");
3687 /* Determines cost of the candidate CAND. */
3689 static void
3690 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3692 unsigned cost_base, cost_step;
3693 tree base;
3695 if (!cand->iv)
3697 cand->cost = 0;
3698 return;
3701 /* There are two costs associated with the candidate -- its increment
3702 and its initialization. The second is almost negligible for any loop
3703 that rolls enough, so we take it just very little into account. */
3705 base = cand->iv->base;
3706 cost_base = force_var_cost (data, base, NULL);
3707 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3709 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3711 /* Prefer the original iv unless we may gain something by replacing it. */
3712 if (cand->pos == IP_ORIGINAL)
3713 cand->cost--;
3715 /* Prefer not to insert statements into latch unless there are some
3716 already (so that we do not create unnecessary jumps). */
3717 if (cand->pos == IP_END
3718 && empty_block_p (ip_end_pos (data->current_loop)))
3719 cand->cost++;
3722 /* Determines costs of computation of the candidates. */
3724 static void
3725 determine_iv_costs (struct ivopts_data *data)
3727 unsigned i;
3729 if (dump_file && (dump_flags & TDF_DETAILS))
3731 fprintf (dump_file, "Candidate costs:\n");
3732 fprintf (dump_file, " cand\tcost\n");
3735 for (i = 0; i < n_iv_cands (data); i++)
3737 struct iv_cand *cand = iv_cand (data, i);
3739 determine_iv_cost (data, cand);
3741 if (dump_file && (dump_flags & TDF_DETAILS))
3742 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3745 if (dump_file && (dump_flags & TDF_DETAILS))
3746 fprintf (dump_file, "\n");
3749 /* Calculates cost for having SIZE induction variables. */
3751 static unsigned
3752 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3754 return global_cost_for_size (size,
3755 loop_data (data->current_loop)->regs_used,
3756 n_iv_uses (data));
3759 /* For each size of the induction variable set determine the penalty. */
3761 static void
3762 determine_set_costs (struct ivopts_data *data)
3764 unsigned j, n;
3765 tree phi, op;
3766 struct loop *loop = data->current_loop;
3767 bitmap_iterator bi;
3769 /* We use the following model (definitely improvable, especially the
3770 cost function -- TODO):
3772 We estimate the number of registers available (using MD data), name it A.
3774 We estimate the number of registers used by the loop, name it U. This
3775 number is obtained as the number of loop phi nodes (not counting virtual
3776 registers and bivs) + the number of variables from outside of the loop.
3778 We set a reserve R (free regs that are used for temporary computations,
3779 etc.). For now the reserve is a constant 3.
3781 Let I be the number of induction variables.
3783 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3784 make a lot of ivs without a reason).
3785 -- if A - R < U + I <= A, the cost is I * PRES_COST
3786 -- if U + I > A, the cost is I * PRES_COST and
3787 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3789 if (dump_file && (dump_flags & TDF_DETAILS))
3791 fprintf (dump_file, "Global costs:\n");
3792 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3793 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3794 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3795 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3798 n = 0;
3799 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3801 op = PHI_RESULT (phi);
3803 if (!is_gimple_reg (op))
3804 continue;
3806 if (get_iv (data, op))
3807 continue;
3809 n++;
3812 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3814 struct version_info *info = ver_info (data, j);
3816 if (info->inv_id && info->has_nonlin_use)
3817 n++;
3820 loop_data (loop)->regs_used = n;
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 fprintf (dump_file, " regs_used %d\n", n);
3824 if (dump_file && (dump_flags & TDF_DETAILS))
3826 fprintf (dump_file, " cost for size:\n");
3827 fprintf (dump_file, " ivs\tcost\n");
3828 for (j = 0; j <= 2 * target_avail_regs; j++)
3829 fprintf (dump_file, " %d\t%d\n", j,
3830 ivopts_global_cost_for_size (data, j));
3831 fprintf (dump_file, "\n");
3835 /* Returns true if A is a cheaper cost pair than B. */
3837 static bool
3838 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3840 if (!a)
3841 return false;
3843 if (!b)
3844 return true;
3846 if (a->cost < b->cost)
3847 return true;
3849 if (a->cost > b->cost)
3850 return false;
3852 /* In case the costs are the same, prefer the cheaper candidate. */
3853 if (a->cand->cost < b->cand->cost)
3854 return true;
3856 return false;
3859 /* Computes the cost field of IVS structure. */
3861 static void
3862 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3864 unsigned cost = 0;
3866 cost += ivs->cand_use_cost;
3867 cost += ivs->cand_cost;
3868 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3870 ivs->cost = cost;
3873 /* Set USE not to be expressed by any candidate in IVS. */
3875 static void
3876 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3877 struct iv_use *use)
3879 unsigned uid = use->id, cid, iid;
3880 bitmap deps;
3881 struct cost_pair *cp;
3882 bitmap_iterator bi;
3884 cp = ivs->cand_for_use[uid];
3885 if (!cp)
3886 return;
3887 cid = cp->cand->id;
3889 ivs->bad_uses++;
3890 ivs->cand_for_use[uid] = NULL;
3891 ivs->n_cand_uses[cid]--;
3893 if (ivs->n_cand_uses[cid] == 0)
3895 bitmap_clear_bit (ivs->cands, cid);
3896 /* Do not count the pseudocandidates. */
3897 if (cp->cand->iv)
3898 ivs->n_regs--;
3899 ivs->n_cands--;
3900 ivs->cand_cost -= cp->cand->cost;
3903 ivs->cand_use_cost -= cp->cost;
3905 deps = cp->depends_on;
3907 if (deps)
3909 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3911 ivs->n_invariant_uses[iid]--;
3912 if (ivs->n_invariant_uses[iid] == 0)
3913 ivs->n_regs--;
3917 iv_ca_recount_cost (data, ivs);
3920 /* Set cost pair for USE in set IVS to CP. */
3922 static void
3923 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3924 struct iv_use *use, struct cost_pair *cp)
3926 unsigned uid = use->id, cid, iid;
3927 bitmap deps;
3928 bitmap_iterator bi;
3930 if (ivs->cand_for_use[uid] == cp)
3931 return;
3933 if (ivs->cand_for_use[uid])
3934 iv_ca_set_no_cp (data, ivs, use);
3936 if (cp)
3938 cid = cp->cand->id;
3940 ivs->bad_uses--;
3941 ivs->cand_for_use[uid] = cp;
3942 ivs->n_cand_uses[cid]++;
3943 if (ivs->n_cand_uses[cid] == 1)
3945 bitmap_set_bit (ivs->cands, cid);
3946 /* Do not count the pseudocandidates. */
3947 if (cp->cand->iv)
3948 ivs->n_regs++;
3949 ivs->n_cands++;
3950 ivs->cand_cost += cp->cand->cost;
3953 ivs->cand_use_cost += cp->cost;
3955 deps = cp->depends_on;
3957 if (deps)
3959 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3961 ivs->n_invariant_uses[iid]++;
3962 if (ivs->n_invariant_uses[iid] == 1)
3963 ivs->n_regs++;
3967 iv_ca_recount_cost (data, ivs);
3971 /* Extend set IVS by expressing USE by some of the candidates in it
3972 if possible. */
3974 static void
3975 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3976 struct iv_use *use)
3978 struct cost_pair *best_cp = NULL, *cp;
3979 bitmap_iterator bi;
3980 unsigned i;
3982 gcc_assert (ivs->upto >= use->id);
3984 if (ivs->upto == use->id)
3986 ivs->upto++;
3987 ivs->bad_uses++;
3990 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3992 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3994 if (cheaper_cost_pair (cp, best_cp))
3995 best_cp = cp;
3998 iv_ca_set_cp (data, ivs, use, best_cp);
4001 /* Get cost for assignment IVS. */
4003 static unsigned
4004 iv_ca_cost (struct iv_ca *ivs)
4006 return (ivs->bad_uses ? INFTY : ivs->cost);
4009 /* Returns true if all dependences of CP are among invariants in IVS. */
4011 static bool
4012 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4014 unsigned i;
4015 bitmap_iterator bi;
4017 if (!cp->depends_on)
4018 return true;
4020 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4022 if (ivs->n_invariant_uses[i] == 0)
4023 return false;
4026 return true;
4029 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4030 it before NEXT_CHANGE. */
4032 static struct iv_ca_delta *
4033 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4034 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4036 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4038 change->use = use;
4039 change->old_cp = old_cp;
4040 change->new_cp = new_cp;
4041 change->next_change = next_change;
4043 return change;
4046 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4047 are rewritten. */
4049 static struct iv_ca_delta *
4050 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4052 struct iv_ca_delta *last;
4054 if (!l2)
4055 return l1;
4057 if (!l1)
4058 return l2;
4060 for (last = l1; last->next_change; last = last->next_change)
4061 continue;
4062 last->next_change = l2;
4064 return l1;
4067 /* Returns candidate by that USE is expressed in IVS. */
4069 static struct cost_pair *
4070 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4072 return ivs->cand_for_use[use->id];
4075 /* Reverse the list of changes DELTA, forming the inverse to it. */
4077 static struct iv_ca_delta *
4078 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4080 struct iv_ca_delta *act, *next, *prev = NULL;
4081 struct cost_pair *tmp;
4083 for (act = delta; act; act = next)
4085 next = act->next_change;
4086 act->next_change = prev;
4087 prev = act;
4089 tmp = act->old_cp;
4090 act->old_cp = act->new_cp;
4091 act->new_cp = tmp;
4094 return prev;
4097 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4098 reverted instead. */
4100 static void
4101 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4102 struct iv_ca_delta *delta, bool forward)
4104 struct cost_pair *from, *to;
4105 struct iv_ca_delta *act;
4107 if (!forward)
4108 delta = iv_ca_delta_reverse (delta);
4110 for (act = delta; act; act = act->next_change)
4112 from = act->old_cp;
4113 to = act->new_cp;
4114 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4115 iv_ca_set_cp (data, ivs, act->use, to);
4118 if (!forward)
4119 iv_ca_delta_reverse (delta);
4122 /* Returns true if CAND is used in IVS. */
4124 static bool
4125 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4127 return ivs->n_cand_uses[cand->id] > 0;
4130 /* Returns number of induction variable candidates in the set IVS. */
4132 static unsigned
4133 iv_ca_n_cands (struct iv_ca *ivs)
4135 return ivs->n_cands;
4138 /* Free the list of changes DELTA. */
4140 static void
4141 iv_ca_delta_free (struct iv_ca_delta **delta)
4143 struct iv_ca_delta *act, *next;
4145 for (act = *delta; act; act = next)
4147 next = act->next_change;
4148 free (act);
4151 *delta = NULL;
4154 /* Allocates new iv candidates assignment. */
4156 static struct iv_ca *
4157 iv_ca_new (struct ivopts_data *data)
4159 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4161 nw->upto = 0;
4162 nw->bad_uses = 0;
4163 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4164 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4165 nw->cands = BITMAP_ALLOC (NULL);
4166 nw->n_cands = 0;
4167 nw->n_regs = 0;
4168 nw->cand_use_cost = 0;
4169 nw->cand_cost = 0;
4170 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4171 nw->cost = 0;
4173 return nw;
4176 /* Free memory occupied by the set IVS. */
4178 static void
4179 iv_ca_free (struct iv_ca **ivs)
4181 free ((*ivs)->cand_for_use);
4182 free ((*ivs)->n_cand_uses);
4183 BITMAP_FREE ((*ivs)->cands);
4184 free ((*ivs)->n_invariant_uses);
4185 free (*ivs);
4186 *ivs = NULL;
4189 /* Dumps IVS to FILE. */
4191 static void
4192 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4194 const char *pref = " invariants ";
4195 unsigned i;
4197 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4198 bitmap_print (file, ivs->cands, " candidates ","\n");
4200 for (i = 1; i <= data->max_inv_id; i++)
4201 if (ivs->n_invariant_uses[i])
4203 fprintf (file, "%s%d", pref, i);
4204 pref = ", ";
4206 fprintf (file, "\n");
4209 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4210 new set, and store differences in DELTA. Number of induction variables
4211 in the new set is stored to N_IVS. */
4213 static unsigned
4214 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4215 struct iv_cand *cand, struct iv_ca_delta **delta,
4216 unsigned *n_ivs)
4218 unsigned i, cost;
4219 struct iv_use *use;
4220 struct cost_pair *old_cp, *new_cp;
4222 *delta = NULL;
4223 for (i = 0; i < ivs->upto; i++)
4225 use = iv_use (data, i);
4226 old_cp = iv_ca_cand_for_use (ivs, use);
4228 if (old_cp
4229 && old_cp->cand == cand)
4230 continue;
4232 new_cp = get_use_iv_cost (data, use, cand);
4233 if (!new_cp)
4234 continue;
4236 if (!iv_ca_has_deps (ivs, new_cp))
4237 continue;
4239 if (!cheaper_cost_pair (new_cp, old_cp))
4240 continue;
4242 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4245 iv_ca_delta_commit (data, ivs, *delta, true);
4246 cost = iv_ca_cost (ivs);
4247 if (n_ivs)
4248 *n_ivs = iv_ca_n_cands (ivs);
4249 iv_ca_delta_commit (data, ivs, *delta, false);
4251 return cost;
4254 /* Try narrowing set IVS by removing CAND. Return the cost of
4255 the new set and store the differences in DELTA. */
4257 static unsigned
4258 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4259 struct iv_cand *cand, struct iv_ca_delta **delta)
4261 unsigned i, ci;
4262 struct iv_use *use;
4263 struct cost_pair *old_cp, *new_cp, *cp;
4264 bitmap_iterator bi;
4265 struct iv_cand *cnd;
4266 unsigned cost;
4268 *delta = NULL;
4269 for (i = 0; i < n_iv_uses (data); i++)
4271 use = iv_use (data, i);
4273 old_cp = iv_ca_cand_for_use (ivs, use);
4274 if (old_cp->cand != cand)
4275 continue;
4277 new_cp = NULL;
4279 if (data->consider_all_candidates)
4281 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4283 if (ci == cand->id)
4284 continue;
4286 cnd = iv_cand (data, ci);
4288 cp = get_use_iv_cost (data, use, cnd);
4289 if (!cp)
4290 continue;
4291 if (!iv_ca_has_deps (ivs, cp))
4292 continue;
4294 if (!cheaper_cost_pair (cp, new_cp))
4295 continue;
4297 new_cp = cp;
4300 else
4302 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4304 if (ci == cand->id)
4305 continue;
4307 cnd = iv_cand (data, ci);
4309 cp = get_use_iv_cost (data, use, cnd);
4310 if (!cp)
4311 continue;
4312 if (!iv_ca_has_deps (ivs, cp))
4313 continue;
4315 if (!cheaper_cost_pair (cp, new_cp))
4316 continue;
4318 new_cp = cp;
4322 if (!new_cp)
4324 iv_ca_delta_free (delta);
4325 return INFTY;
4328 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4331 iv_ca_delta_commit (data, ivs, *delta, true);
4332 cost = iv_ca_cost (ivs);
4333 iv_ca_delta_commit (data, ivs, *delta, false);
4335 return cost;
4338 /* Try optimizing the set of candidates IVS by removing candidates different
4339 from to EXCEPT_CAND from it. Return cost of the new set, and store
4340 differences in DELTA. */
4342 static unsigned
4343 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4344 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4346 bitmap_iterator bi;
4347 struct iv_ca_delta *act_delta, *best_delta;
4348 unsigned i, best_cost, acost;
4349 struct iv_cand *cand;
4351 best_delta = NULL;
4352 best_cost = iv_ca_cost (ivs);
4354 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4356 cand = iv_cand (data, i);
4358 if (cand == except_cand)
4359 continue;
4361 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4363 if (acost < best_cost)
4365 best_cost = acost;
4366 iv_ca_delta_free (&best_delta);
4367 best_delta = act_delta;
4369 else
4370 iv_ca_delta_free (&act_delta);
4373 if (!best_delta)
4375 *delta = NULL;
4376 return best_cost;
4379 /* Recurse to possibly remove other unnecessary ivs. */
4380 iv_ca_delta_commit (data, ivs, best_delta, true);
4381 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4382 iv_ca_delta_commit (data, ivs, best_delta, false);
4383 *delta = iv_ca_delta_join (best_delta, *delta);
4384 return best_cost;
4387 /* Tries to extend the sets IVS in the best possible way in order
4388 to express the USE. */
4390 static bool
4391 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4392 struct iv_use *use)
4394 unsigned best_cost, act_cost;
4395 unsigned i;
4396 bitmap_iterator bi;
4397 struct iv_cand *cand;
4398 struct iv_ca_delta *best_delta = NULL, *act_delta;
4399 struct cost_pair *cp;
4401 iv_ca_add_use (data, ivs, use);
4402 best_cost = iv_ca_cost (ivs);
4404 cp = iv_ca_cand_for_use (ivs, use);
4405 if (cp)
4407 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4408 iv_ca_set_no_cp (data, ivs, use);
4411 /* First try important candidates. Only if it fails, try the specific ones.
4412 Rationale -- in loops with many variables the best choice often is to use
4413 just one generic biv. If we added here many ivs specific to the uses,
4414 the optimization algorithm later would be likely to get stuck in a local
4415 minimum, thus causing us to create too many ivs. The approach from
4416 few ivs to more seems more likely to be successful -- starting from few
4417 ivs, replacing an expensive use by a specific iv should always be a
4418 win. */
4419 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4421 cand = iv_cand (data, i);
4423 if (iv_ca_cand_used_p (ivs, cand))
4424 continue;
4426 cp = get_use_iv_cost (data, use, cand);
4427 if (!cp)
4428 continue;
4430 iv_ca_set_cp (data, ivs, use, cp);
4431 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4432 iv_ca_set_no_cp (data, ivs, use);
4433 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4435 if (act_cost < best_cost)
4437 best_cost = act_cost;
4439 iv_ca_delta_free (&best_delta);
4440 best_delta = act_delta;
4442 else
4443 iv_ca_delta_free (&act_delta);
4446 if (best_cost == INFTY)
4448 for (i = 0; i < use->n_map_members; i++)
4450 cp = use->cost_map + i;
4451 cand = cp->cand;
4452 if (!cand)
4453 continue;
4455 /* Already tried this. */
4456 if (cand->important)
4457 continue;
4459 if (iv_ca_cand_used_p (ivs, cand))
4460 continue;
4462 act_delta = NULL;
4463 iv_ca_set_cp (data, ivs, use, cp);
4464 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4465 iv_ca_set_no_cp (data, ivs, use);
4466 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4467 cp, act_delta);
4469 if (act_cost < best_cost)
4471 best_cost = act_cost;
4473 if (best_delta)
4474 iv_ca_delta_free (&best_delta);
4475 best_delta = act_delta;
4477 else
4478 iv_ca_delta_free (&act_delta);
4482 iv_ca_delta_commit (data, ivs, best_delta, true);
4483 iv_ca_delta_free (&best_delta);
4485 return (best_cost != INFTY);
4488 /* Finds an initial assignment of candidates to uses. */
4490 static struct iv_ca *
4491 get_initial_solution (struct ivopts_data *data)
4493 struct iv_ca *ivs = iv_ca_new (data);
4494 unsigned i;
4496 for (i = 0; i < n_iv_uses (data); i++)
4497 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4499 iv_ca_free (&ivs);
4500 return NULL;
4503 return ivs;
4506 /* Tries to improve set of induction variables IVS. */
4508 static bool
4509 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4511 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4512 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4513 struct iv_cand *cand;
4515 /* Try extending the set of induction variables by one. */
4516 for (i = 0; i < n_iv_cands (data); i++)
4518 cand = iv_cand (data, i);
4520 if (iv_ca_cand_used_p (ivs, cand))
4521 continue;
4523 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4524 if (!act_delta)
4525 continue;
4527 /* If we successfully added the candidate and the set is small enough,
4528 try optimizing it by removing other candidates. */
4529 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4531 iv_ca_delta_commit (data, ivs, act_delta, true);
4532 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4533 iv_ca_delta_commit (data, ivs, act_delta, false);
4534 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4537 if (acost < best_cost)
4539 best_cost = acost;
4540 iv_ca_delta_free (&best_delta);
4541 best_delta = act_delta;
4543 else
4544 iv_ca_delta_free (&act_delta);
4547 if (!best_delta)
4549 /* Try removing the candidates from the set instead. */
4550 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4552 /* Nothing more we can do. */
4553 if (!best_delta)
4554 return false;
4557 iv_ca_delta_commit (data, ivs, best_delta, true);
4558 gcc_assert (best_cost == iv_ca_cost (ivs));
4559 iv_ca_delta_free (&best_delta);
4560 return true;
4563 /* Attempts to find the optimal set of induction variables. We do simple
4564 greedy heuristic -- we try to replace at most one candidate in the selected
4565 solution and remove the unused ivs while this improves the cost. */
4567 static struct iv_ca *
4568 find_optimal_iv_set (struct ivopts_data *data)
4570 unsigned i;
4571 struct iv_ca *set;
4572 struct iv_use *use;
4574 /* Get the initial solution. */
4575 set = get_initial_solution (data);
4576 if (!set)
4578 if (dump_file && (dump_flags & TDF_DETAILS))
4579 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4580 return NULL;
4583 if (dump_file && (dump_flags & TDF_DETAILS))
4585 fprintf (dump_file, "Initial set of candidates:\n");
4586 iv_ca_dump (data, dump_file, set);
4589 while (try_improve_iv_set (data, set))
4591 if (dump_file && (dump_flags & TDF_DETAILS))
4593 fprintf (dump_file, "Improved to:\n");
4594 iv_ca_dump (data, dump_file, set);
4598 if (dump_file && (dump_flags & TDF_DETAILS))
4599 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4601 for (i = 0; i < n_iv_uses (data); i++)
4603 use = iv_use (data, i);
4604 use->selected = iv_ca_cand_for_use (set, use)->cand;
4607 return set;
4610 /* Creates a new induction variable corresponding to CAND. */
4612 static void
4613 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4615 block_stmt_iterator incr_pos;
4616 tree base;
4617 bool after = false;
4619 if (!cand->iv)
4620 return;
4622 switch (cand->pos)
4624 case IP_NORMAL:
4625 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4626 break;
4628 case IP_END:
4629 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4630 after = true;
4631 break;
4633 case IP_ORIGINAL:
4634 /* Mark that the iv is preserved. */
4635 name_info (data, cand->var_before)->preserve_biv = true;
4636 name_info (data, cand->var_after)->preserve_biv = true;
4638 /* Rewrite the increment so that it uses var_before directly. */
4639 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4641 return;
4644 gimple_add_tmp_var (cand->var_before);
4645 add_referenced_tmp_var (cand->var_before);
4647 base = unshare_expr (cand->iv->base);
4649 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4650 &incr_pos, after, &cand->var_before, &cand->var_after);
4653 /* Creates new induction variables described in SET. */
4655 static void
4656 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4658 unsigned i;
4659 struct iv_cand *cand;
4660 bitmap_iterator bi;
4662 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4664 cand = iv_cand (data, i);
4665 create_new_iv (data, cand);
4669 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4670 is true, remove also the ssa name defined by the statement. */
4672 static void
4673 remove_statement (tree stmt, bool including_defined_name)
4675 if (TREE_CODE (stmt) == PHI_NODE)
4677 if (!including_defined_name)
4679 /* Prevent the ssa name defined by the statement from being removed. */
4680 SET_PHI_RESULT (stmt, NULL);
4682 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
4684 else
4686 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4688 bsi_remove (&bsi);
4692 /* Rewrites USE (definition of iv used in a nonlinear expression)
4693 using candidate CAND. */
4695 static void
4696 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4697 struct iv_use *use, struct iv_cand *cand)
4699 tree comp;
4700 tree op, stmts, tgt, ass;
4701 block_stmt_iterator bsi, pbsi;
4703 /* An important special case -- if we are asked to express value of
4704 the original iv by itself, just exit; there is no need to
4705 introduce a new computation (that might also need casting the
4706 variable to unsigned and back). */
4707 if (cand->pos == IP_ORIGINAL
4708 && TREE_CODE (use->stmt) == MODIFY_EXPR
4709 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4711 op = TREE_OPERAND (use->stmt, 1);
4713 /* Be a bit careful. In case variable is expressed in some
4714 complicated way, rewrite it so that we may get rid of this
4715 complicated expression. */
4716 if ((TREE_CODE (op) == PLUS_EXPR
4717 || TREE_CODE (op) == MINUS_EXPR)
4718 && TREE_OPERAND (op, 0) == cand->var_before
4719 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4720 return;
4723 comp = unshare_expr (get_computation (data->current_loop,
4724 use, cand));
4725 switch (TREE_CODE (use->stmt))
4727 case PHI_NODE:
4728 tgt = PHI_RESULT (use->stmt);
4730 /* If we should keep the biv, do not replace it. */
4731 if (name_info (data, tgt)->preserve_biv)
4732 return;
4734 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4735 while (!bsi_end_p (pbsi)
4736 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4738 bsi = pbsi;
4739 bsi_next (&pbsi);
4741 break;
4743 case MODIFY_EXPR:
4744 tgt = TREE_OPERAND (use->stmt, 0);
4745 bsi = bsi_for_stmt (use->stmt);
4746 break;
4748 default:
4749 gcc_unreachable ();
4752 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4754 if (TREE_CODE (use->stmt) == PHI_NODE)
4756 if (stmts)
4757 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4758 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4759 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4760 remove_statement (use->stmt, false);
4761 SSA_NAME_DEF_STMT (tgt) = ass;
4763 else
4765 if (stmts)
4766 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4767 TREE_OPERAND (use->stmt, 1) = op;
4771 /* Replaces ssa name in index IDX by its basic variable. Callback for
4772 for_each_index. */
4774 static bool
4775 idx_remove_ssa_names (tree base, tree *idx,
4776 void *data ATTRIBUTE_UNUSED)
4778 tree *op;
4780 if (TREE_CODE (*idx) == SSA_NAME)
4781 *idx = SSA_NAME_VAR (*idx);
4783 if (TREE_CODE (base) == ARRAY_REF)
4785 op = &TREE_OPERAND (base, 2);
4786 if (*op
4787 && TREE_CODE (*op) == SSA_NAME)
4788 *op = SSA_NAME_VAR (*op);
4789 op = &TREE_OPERAND (base, 3);
4790 if (*op
4791 && TREE_CODE (*op) == SSA_NAME)
4792 *op = SSA_NAME_VAR (*op);
4795 return true;
4798 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4800 static tree
4801 unshare_and_remove_ssa_names (tree ref)
4803 ref = unshare_expr (ref);
4804 for_each_index (&ref, idx_remove_ssa_names, NULL);
4806 return ref;
4809 /* Rewrites base of memory access OP with expression WITH in statement
4810 pointed to by BSI. */
4812 static void
4813 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4815 tree bvar, var, new_var, new_name, copy, name;
4816 tree orig;
4818 var = bvar = get_base_address (*op);
4820 if (!var || TREE_CODE (with) != SSA_NAME)
4821 goto do_rewrite;
4823 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4824 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4825 if (TREE_CODE (var) == INDIRECT_REF)
4826 var = TREE_OPERAND (var, 0);
4827 if (TREE_CODE (var) == SSA_NAME)
4829 name = var;
4830 var = SSA_NAME_VAR (var);
4832 else if (DECL_P (var))
4833 name = NULL_TREE;
4834 else
4835 goto do_rewrite;
4837 if (var_ann (var)->type_mem_tag)
4838 var = var_ann (var)->type_mem_tag;
4840 /* We need to add a memory tag for the variable. But we do not want
4841 to add it to the temporary used for the computations, since this leads
4842 to problems in redundancy elimination when there are common parts
4843 in two computations referring to the different arrays. So we copy
4844 the variable to a new temporary. */
4845 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4846 if (name)
4847 new_name = duplicate_ssa_name (name, copy);
4848 else
4850 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4851 add_referenced_tmp_var (new_var);
4852 var_ann (new_var)->type_mem_tag = var;
4853 new_name = make_ssa_name (new_var, copy);
4855 TREE_OPERAND (copy, 0) = new_name;
4856 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4857 with = new_name;
4859 do_rewrite:
4861 orig = NULL_TREE;
4862 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4863 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4865 if (TREE_CODE (*op) == INDIRECT_REF)
4866 orig = REF_ORIGINAL (*op);
4867 if (!orig)
4868 orig = unshare_and_remove_ssa_names (*op);
4870 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4872 /* Record the original reference, for purposes of alias analysis. */
4873 REF_ORIGINAL (*op) = orig;
4876 /* Rewrites USE (address that is an iv) using candidate CAND. */
4878 static void
4879 rewrite_use_address (struct ivopts_data *data,
4880 struct iv_use *use, struct iv_cand *cand)
4882 tree comp = unshare_expr (get_computation (data->current_loop,
4883 use, cand));
4884 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4885 tree stmts;
4886 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4888 if (stmts)
4889 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4891 rewrite_address_base (&bsi, use->op_p, op);
4894 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4895 candidate CAND. */
4897 static void
4898 rewrite_use_compare (struct ivopts_data *data,
4899 struct iv_use *use, struct iv_cand *cand)
4901 tree comp;
4902 tree *op_p, cond, op, stmts, bound;
4903 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4904 enum tree_code compare;
4906 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4908 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4909 tree var_type = TREE_TYPE (var);
4911 bound = fold_convert (var_type, bound);
4912 op = force_gimple_operand (unshare_expr (bound), &stmts,
4913 true, NULL_TREE);
4915 if (stmts)
4916 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4918 *use->op_p = build2 (compare, boolean_type_node, var, op);
4919 modify_stmt (use->stmt);
4920 return;
4923 /* The induction variable elimination failed; just express the original
4924 giv. */
4925 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4927 cond = *use->op_p;
4928 op_p = &TREE_OPERAND (cond, 0);
4929 if (TREE_CODE (*op_p) != SSA_NAME
4930 || zero_p (get_iv (data, *op_p)->step))
4931 op_p = &TREE_OPERAND (cond, 1);
4933 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4934 if (stmts)
4935 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4937 *op_p = op;
4940 /* Ensure that operand *OP_P may be used at the end of EXIT without
4941 violating loop closed ssa form. */
4943 static void
4944 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4946 basic_block def_bb;
4947 struct loop *def_loop;
4948 tree phi, use;
4950 use = USE_FROM_PTR (op_p);
4951 if (TREE_CODE (use) != SSA_NAME)
4952 return;
4954 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4955 if (!def_bb)
4956 return;
4958 def_loop = def_bb->loop_father;
4959 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4960 return;
4962 /* Try finding a phi node that copies the value out of the loop. */
4963 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4964 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4965 break;
4967 if (!phi)
4969 /* Create such a phi node. */
4970 tree new_name = duplicate_ssa_name (use, NULL);
4972 phi = create_phi_node (new_name, exit->dest);
4973 SSA_NAME_DEF_STMT (new_name) = phi;
4974 add_phi_arg (phi, use, exit);
4977 SET_USE (op_p, PHI_RESULT (phi));
4980 /* Ensure that operands of STMT may be used at the end of EXIT without
4981 violating loop closed ssa form. */
4983 static void
4984 protect_loop_closed_ssa_form (edge exit, tree stmt)
4986 use_optype uses;
4987 vuse_optype vuses;
4988 v_may_def_optype v_may_defs;
4989 unsigned i;
4991 get_stmt_operands (stmt);
4993 uses = STMT_USE_OPS (stmt);
4994 for (i = 0; i < NUM_USES (uses); i++)
4995 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4997 vuses = STMT_VUSE_OPS (stmt);
4998 for (i = 0; i < NUM_VUSES (vuses); i++)
4999 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5001 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5002 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5003 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5006 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5007 so that they are emitted on the correct place, and so that the loop closed
5008 ssa form is preserved. */
5010 static void
5011 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5013 tree_stmt_iterator tsi;
5014 block_stmt_iterator bsi;
5015 tree phi, stmt, def, next;
5017 if (EDGE_COUNT (exit->dest->preds) > 1)
5018 split_loop_exit_edge (exit);
5020 if (TREE_CODE (stmts) == STATEMENT_LIST)
5022 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5023 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
5025 else
5026 protect_loop_closed_ssa_form (exit, stmts);
5028 /* Ensure there is label in exit->dest, so that we can
5029 insert after it. */
5030 tree_block_label (exit->dest);
5031 bsi = bsi_after_labels (exit->dest);
5032 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5034 if (!op)
5035 return;
5037 for (phi = phi_nodes (exit->dest); phi; phi = next)
5039 next = PHI_CHAIN (phi);
5041 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5043 def = PHI_RESULT (phi);
5044 remove_statement (phi, false);
5045 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5046 def, op);
5047 SSA_NAME_DEF_STMT (def) = stmt;
5048 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5053 /* Rewrites the final value of USE (that is only needed outside of the loop)
5054 using candidate CAND. */
5056 static void
5057 rewrite_use_outer (struct ivopts_data *data,
5058 struct iv_use *use, struct iv_cand *cand)
5060 edge exit;
5061 tree value, op, stmts, tgt;
5062 tree phi;
5064 switch (TREE_CODE (use->stmt))
5066 case PHI_NODE:
5067 tgt = PHI_RESULT (use->stmt);
5068 break;
5069 case MODIFY_EXPR:
5070 tgt = TREE_OPERAND (use->stmt, 0);
5071 break;
5072 default:
5073 gcc_unreachable ();
5076 exit = single_dom_exit (data->current_loop);
5078 if (exit && !(exit->flags & EDGE_COMPLEX))
5080 if (!cand->iv)
5082 bool ok = may_replace_final_value (data, use, &value);
5083 gcc_assert (ok);
5085 else
5086 value = get_computation_at (data->current_loop,
5087 use, cand, last_stmt (exit->src));
5089 value = unshare_expr (value);
5090 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5092 /* If we will preserve the iv anyway and we would need to perform
5093 some computation to replace the final value, do nothing. */
5094 if (stmts && name_info (data, tgt)->preserve_biv)
5095 return;
5097 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5099 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5101 if (USE_FROM_PTR (use_p) == tgt)
5102 SET_USE (use_p, op);
5105 if (stmts)
5106 compute_phi_arg_on_exit (exit, stmts, op);
5108 /* Enable removal of the statement. We cannot remove it directly,
5109 since we may still need the aliasing information attached to the
5110 ssa name defined by it. */
5111 name_info (data, tgt)->iv->have_use_for = false;
5112 return;
5115 /* If the variable is going to be preserved anyway, there is nothing to
5116 do. */
5117 if (name_info (data, tgt)->preserve_biv)
5118 return;
5120 /* Otherwise we just need to compute the iv. */
5121 rewrite_use_nonlinear_expr (data, use, cand);
5124 /* Rewrites USE using candidate CAND. */
5126 static void
5127 rewrite_use (struct ivopts_data *data,
5128 struct iv_use *use, struct iv_cand *cand)
5130 switch (use->type)
5132 case USE_NONLINEAR_EXPR:
5133 rewrite_use_nonlinear_expr (data, use, cand);
5134 break;
5136 case USE_OUTER:
5137 rewrite_use_outer (data, use, cand);
5138 break;
5140 case USE_ADDRESS:
5141 rewrite_use_address (data, use, cand);
5142 break;
5144 case USE_COMPARE:
5145 rewrite_use_compare (data, use, cand);
5146 break;
5148 default:
5149 gcc_unreachable ();
5151 modify_stmt (use->stmt);
5154 /* Rewrite the uses using the selected induction variables. */
5156 static void
5157 rewrite_uses (struct ivopts_data *data)
5159 unsigned i;
5160 struct iv_cand *cand;
5161 struct iv_use *use;
5163 for (i = 0; i < n_iv_uses (data); i++)
5165 use = iv_use (data, i);
5166 cand = use->selected;
5167 gcc_assert (cand);
5169 rewrite_use (data, use, cand);
5173 /* Removes the ivs that are not used after rewriting. */
5175 static void
5176 remove_unused_ivs (struct ivopts_data *data)
5178 unsigned j;
5179 bitmap_iterator bi;
5181 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5183 struct version_info *info;
5185 info = ver_info (data, j);
5186 if (info->iv
5187 && !zero_p (info->iv->step)
5188 && !info->inv_id
5189 && !info->iv->have_use_for
5190 && !info->preserve_biv)
5191 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5195 /* Frees data allocated by the optimization of a single loop. */
5197 static void
5198 free_loop_data (struct ivopts_data *data)
5200 unsigned i, j;
5201 bitmap_iterator bi;
5203 htab_empty (data->niters);
5205 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5207 struct version_info *info;
5209 info = ver_info (data, i);
5210 if (info->iv)
5211 free (info->iv);
5212 info->iv = NULL;
5213 info->has_nonlin_use = false;
5214 info->preserve_biv = false;
5215 info->inv_id = 0;
5217 bitmap_clear (data->relevant);
5218 bitmap_clear (data->important_candidates);
5220 for (i = 0; i < n_iv_uses (data); i++)
5222 struct iv_use *use = iv_use (data, i);
5224 free (use->iv);
5225 BITMAP_FREE (use->related_cands);
5226 for (j = 0; j < use->n_map_members; j++)
5227 if (use->cost_map[j].depends_on)
5228 BITMAP_FREE (use->cost_map[j].depends_on);
5229 free (use->cost_map);
5230 free (use);
5232 VARRAY_POP_ALL (data->iv_uses);
5234 for (i = 0; i < n_iv_cands (data); i++)
5236 struct iv_cand *cand = iv_cand (data, i);
5238 if (cand->iv)
5239 free (cand->iv);
5240 free (cand);
5242 VARRAY_POP_ALL (data->iv_candidates);
5244 if (data->version_info_size < num_ssa_names)
5246 data->version_info_size = 2 * num_ssa_names;
5247 free (data->version_info);
5248 data->version_info = xcalloc (data->version_info_size,
5249 sizeof (struct version_info));
5252 data->max_inv_id = 0;
5254 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5256 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5258 SET_DECL_RTL (obj, NULL_RTX);
5260 VARRAY_POP_ALL (decl_rtl_to_reset);
5263 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5264 loop tree. */
5266 static void
5267 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5269 unsigned i;
5271 for (i = 1; i < loops->num; i++)
5272 if (loops->parray[i])
5274 free (loops->parray[i]->aux);
5275 loops->parray[i]->aux = NULL;
5278 free_loop_data (data);
5279 free (data->version_info);
5280 BITMAP_FREE (data->relevant);
5281 BITMAP_FREE (data->important_candidates);
5282 htab_delete (data->niters);
5284 VARRAY_FREE (decl_rtl_to_reset);
5285 VARRAY_FREE (data->iv_uses);
5286 VARRAY_FREE (data->iv_candidates);
5289 /* Optimizes the LOOP. Returns true if anything changed. */
5291 static bool
5292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5294 bool changed = false;
5295 struct iv_ca *iv_ca;
5296 edge exit;
5298 data->current_loop = loop;
5300 if (dump_file && (dump_flags & TDF_DETAILS))
5302 fprintf (dump_file, "Processing loop %d\n", loop->num);
5304 exit = single_dom_exit (loop);
5305 if (exit)
5307 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5308 exit->src->index, exit->dest->index);
5309 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5310 fprintf (dump_file, "\n");
5313 fprintf (dump_file, "\n");
5316 /* For each ssa name determines whether it behaves as an induction variable
5317 in some loop. */
5318 if (!find_induction_variables (data))
5319 goto finish;
5321 /* Finds interesting uses (item 1). */
5322 find_interesting_uses (data);
5323 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5324 goto finish;
5326 /* Finds candidates for the induction variables (item 2). */
5327 find_iv_candidates (data);
5329 /* Calculates the costs (item 3, part 1). */
5330 determine_use_iv_costs (data);
5331 determine_iv_costs (data);
5332 determine_set_costs (data);
5334 /* Find the optimal set of induction variables (item 3, part 2). */
5335 iv_ca = find_optimal_iv_set (data);
5336 if (!iv_ca)
5337 goto finish;
5338 changed = true;
5340 /* Create the new induction variables (item 4, part 1). */
5341 create_new_ivs (data, iv_ca);
5342 iv_ca_free (&iv_ca);
5344 /* Rewrite the uses (item 4, part 2). */
5345 rewrite_uses (data);
5347 /* Remove the ivs that are unused after rewriting. */
5348 remove_unused_ivs (data);
5350 /* We have changed the structure of induction variables; it might happen
5351 that definitions in the scev database refer to some of them that were
5352 eliminated. */
5353 scev_reset ();
5355 finish:
5356 free_loop_data (data);
5358 return changed;
5361 /* Main entry point. Optimizes induction variables in LOOPS. */
5363 void
5364 tree_ssa_iv_optimize (struct loops *loops)
5366 struct loop *loop;
5367 struct ivopts_data data;
5369 tree_ssa_iv_optimize_init (loops, &data);
5371 /* Optimize the loops starting with the innermost ones. */
5372 loop = loops->tree_root;
5373 while (loop->inner)
5374 loop = loop->inner;
5376 #ifdef ENABLE_CHECKING
5377 verify_loop_closed_ssa ();
5378 verify_stmts ();
5379 #endif
5381 /* Scan the loops, inner ones first. */
5382 while (loop != loops->tree_root)
5384 if (dump_file && (dump_flags & TDF_DETAILS))
5385 flow_loop_dump (loop, dump_file, NULL, 1);
5387 tree_ssa_iv_optimize_loop (&data, loop);
5389 if (loop->next)
5391 loop = loop->next;
5392 while (loop->inner)
5393 loop = loop->inner;
5395 else
5396 loop = loop->outer;
5399 #ifdef ENABLE_CHECKING
5400 verify_loop_closed_ssa ();
5401 verify_stmts ();
5402 #endif
5404 tree_ssa_iv_optimize_finalize (loops, &data);