* varasm.c (elf_record_gcc_switches): Cast second argument of
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobca95823f6a2397fd7ca14cb0ac993bca8acaac87
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Types of uses. */
125 enum use_type
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
132 /* The candidate - cost pair. */
133 struct cost_pair
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
144 /* Use. */
145 struct iv_use
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand *selected;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
164 enum iv_position
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
171 /* The induction variable candidate. */
172 struct iv_cand
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201 struct ivopts_data
203 /* The currently optimized loop. */
204 struct loop *current_loop;
206 /* Number of registers used in it. */
207 unsigned regs_used;
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
238 /* An assignment of iv candidates to uses. */
240 struct iv_ca
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
254 /* The candidates used. */
255 bitmap cands;
257 /* The number of candidates in the set. */
258 unsigned n_cands;
260 /* Total number of registers needed. */
261 unsigned n_regs;
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
266 /* Total cost of candidates. */
267 unsigned cand_cost;
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
272 /* Total cost of the assignment. */
273 unsigned cost;
276 /* Difference of two iv candidate assignments. */
278 struct iv_ca_delta
280 /* Changed use. */
281 struct iv_use *use;
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
286 /* A new assignment. */
287 struct cost_pair *new_cp;
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
313 static VEC(tree,heap) *decl_rtl_to_reset;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
320 return VEC_length (iv_use_p, data->iv_uses);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use *
326 iv_use (struct ivopts_data *data, unsigned i)
328 return VEC_index (iv_use_p, data->iv_uses, i);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data *data)
336 return VEC_length (iv_cand_p, data->iv_candidates);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand *
342 iv_cand (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 edge
350 single_dom_exit (struct loop *loop)
352 edge exit = single_exit (loop);
354 if (!exit)
355 return NULL;
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
360 return exit;
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv *);
366 void
367 dump_iv (FILE *file, struct iv *iv)
369 if (iv->ssa_name)
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
380 if (iv->step)
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
390 else
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
397 if (iv->base_object)
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use *);
411 void
412 dump_use (FILE *file, struct iv_use *use)
414 fprintf (file, "use %d\n", use->id);
416 switch (use->type)
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
430 default:
431 gcc_unreachable ();
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
443 dump_iv (file, use->iv);
445 if (use->related_cands)
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
456 dump_uses (FILE *file, struct ivopts_data *data)
458 unsigned i;
459 struct iv_use *use;
461 for (i = 0; i < n_iv_uses (data); i++)
463 use = iv_use (data, i);
465 dump_use (file, use);
466 fprintf (file, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
474 dump_cand (FILE *file, struct iv_cand *cand)
476 struct iv *iv = cand->iv;
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
481 if (cand->depends_on)
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
487 if (!iv)
489 fprintf (file, " final value replacement\n");
490 return;
493 switch (cand->pos)
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
508 dump_iv (file, iv);
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info *
514 ver_info (struct ivopts_data *data, unsigned ver)
516 return data->version_info + ver;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info *
522 name_info (struct ivopts_data *data, tree name)
524 return ver_info (data, SSA_NAME_VERSION (name));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
530 static bool
531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
538 a &= mask;
539 b &= mask;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
550 if (!(b & 1))
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
566 val = (a * inv) & mask;
568 gcc_assert (((val * b) & mask) == a);
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
573 *x = val;
575 return true;
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
581 static bool
582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
586 gcc_assert (bb);
588 if (sbb == loop->latch)
589 return true;
591 if (sbb != bb)
592 return false;
594 return stmt == last_stmt (bb);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
600 static bool
601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
610 if (stmt_bb != cand_bb)
611 return true;
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
627 static bool
628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
630 switch (cand->pos)
632 case IP_END:
633 return false;
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
641 default:
642 gcc_unreachable ();
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
648 static bool
649 abnormal_ssa_name_p (tree exp)
651 if (!exp)
652 return false;
654 if (TREE_CODE (exp) != SSA_NAME)
655 return false;
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
663 static bool
664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 void *data ATTRIBUTE_UNUSED)
667 if (TREE_CODE (base) == ARRAY_REF)
669 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 return false;
671 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 return false;
675 return !abnormal_ssa_name_p (*index);
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
681 bool
682 contains_abnormal_ssa_name_p (tree expr)
684 enum tree_code code;
685 enum tree_code_class class;
687 if (!expr)
688 return false;
690 code = TREE_CODE (expr);
691 class = TREE_CODE_CLASS (code);
693 if (code == SSA_NAME)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
696 if (code == INTEGER_CST
697 || is_gimple_min_invariant (expr))
698 return false;
700 if (code == ADDR_EXPR)
701 return !for_each_index (&TREE_OPERAND (expr, 0),
702 idx_contains_abnormal_ssa_name_p,
703 NULL);
705 switch (class)
707 case tcc_binary:
708 case tcc_comparison:
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 return true;
712 /* Fallthru. */
713 case tcc_unary:
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 return true;
717 break;
719 default:
720 gcc_unreachable ();
723 return false;
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
729 struct nfe_cache_elt
731 /* The edge for that the number of iterations is cached. */
732 edge exit;
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
735 determined. */
736 tree niter;
739 /* Hash function for nfe_cache_elt E. */
741 static hashval_t
742 nfe_hash (const void *e)
744 const struct nfe_cache_elt *elt = e;
746 return htab_hash_pointer (elt->exit);
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
751 static int
752 nfe_eq (const void *e1, const void *e2)
754 const struct nfe_cache_elt *elt1 = e1;
756 return elt1->exit == e2;
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
762 static tree
763 niter_for_exit (struct ivopts_data *data, edge exit)
765 struct nfe_cache_elt *nfe_desc;
766 struct tree_niter_desc desc;
767 PTR *slot;
769 slot = htab_find_slot_with_hash (data->niters, exit,
770 htab_hash_pointer (exit),
771 INSERT);
773 if (!*slot)
775 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776 nfe_desc->exit = exit;
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data->current_loop,
784 exit, &desc, true)
785 && zero_p (desc.may_be_zero)
786 && !contains_abnormal_ssa_name_p (desc.niter))
787 nfe_desc->niter = desc.niter;
788 else
789 nfe_desc->niter = NULL_TREE;
791 else
792 nfe_desc = *slot;
794 return nfe_desc->niter;
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
799 goes wrong. */
801 static tree
802 niter_for_single_dom_exit (struct ivopts_data *data)
804 edge exit = single_dom_exit (data->current_loop);
806 if (!exit)
807 return NULL;
809 return niter_for_exit (data, exit);
812 /* Initializes data structures used by the iv optimization pass, stored
813 in DATA. */
815 static void
816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
818 data->version_info_size = 2 * num_ssa_names;
819 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820 data->relevant = BITMAP_ALLOC (NULL);
821 data->important_candidates = BITMAP_ALLOC (NULL);
822 data->max_inv_id = 0;
823 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
832 static tree
833 determine_base_object (tree expr)
835 enum tree_code code = TREE_CODE (expr);
836 tree base, obj, op0, op1;
838 /* If this is a pointer casted to any type, we need to determine
839 the base object for the pointer; so handle conversions before
840 throwing away non-pointer expressions. */
841 if (TREE_CODE (expr) == NOP_EXPR
842 || TREE_CODE (expr) == CONVERT_EXPR)
843 return determine_base_object (TREE_OPERAND (expr, 0));
845 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
846 return NULL_TREE;
848 switch (code)
850 case INTEGER_CST:
851 return NULL_TREE;
853 case ADDR_EXPR:
854 obj = TREE_OPERAND (expr, 0);
855 base = get_base_address (obj);
857 if (!base)
858 return expr;
860 if (TREE_CODE (base) == INDIRECT_REF)
861 return determine_base_object (TREE_OPERAND (base, 0));
863 return fold_convert (ptr_type_node,
864 build_fold_addr_expr (base));
866 case PLUS_EXPR:
867 case MINUS_EXPR:
868 op0 = determine_base_object (TREE_OPERAND (expr, 0));
869 op1 = determine_base_object (TREE_OPERAND (expr, 1));
871 if (!op1)
872 return op0;
874 if (!op0)
875 return (code == PLUS_EXPR
876 ? op1
877 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
879 return fold_build2 (code, ptr_type_node, op0, op1);
881 default:
882 return fold_convert (ptr_type_node, expr);
886 /* Allocates an induction variable with given initial value BASE and step STEP
887 for loop LOOP. */
889 static struct iv *
890 alloc_iv (tree base, tree step)
892 struct iv *iv = XCNEW (struct iv);
894 if (step && integer_zerop (step))
895 step = NULL_TREE;
897 iv->base = base;
898 iv->base_object = determine_base_object (base);
899 iv->step = step;
900 iv->biv_p = false;
901 iv->have_use_for = false;
902 iv->use_id = 0;
903 iv->ssa_name = NULL_TREE;
905 return iv;
908 /* Sets STEP and BASE for induction variable IV. */
910 static void
911 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
913 struct version_info *info = name_info (data, iv);
915 gcc_assert (!info->iv);
917 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
918 info->iv = alloc_iv (base, step);
919 info->iv->ssa_name = iv;
922 /* Finds induction variable declaration for VAR. */
924 static struct iv *
925 get_iv (struct ivopts_data *data, tree var)
927 basic_block bb;
929 if (!name_info (data, var)->iv)
931 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
933 if (!bb
934 || !flow_bb_inside_loop_p (data->current_loop, bb))
935 set_iv (data, var, var, NULL_TREE);
938 return name_info (data, var)->iv;
941 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
942 not define a simple affine biv with nonzero step. */
944 static tree
945 determine_biv_step (tree phi)
947 struct loop *loop = bb_for_stmt (phi)->loop_father;
948 tree name = PHI_RESULT (phi);
949 affine_iv iv;
951 if (!is_gimple_reg (name))
952 return NULL_TREE;
954 if (!simple_iv (loop, phi, name, &iv, true))
955 return NULL_TREE;
957 return (zero_p (iv.step) ? NULL_TREE : iv.step);
960 /* Finds basic ivs. */
962 static bool
963 find_bivs (struct ivopts_data *data)
965 tree phi, step, type, base;
966 bool found = false;
967 struct loop *loop = data->current_loop;
969 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
971 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
972 continue;
974 step = determine_biv_step (phi);
975 if (!step)
976 continue;
978 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
979 base = expand_simple_operations (base);
980 if (contains_abnormal_ssa_name_p (base)
981 || contains_abnormal_ssa_name_p (step))
982 continue;
984 type = TREE_TYPE (PHI_RESULT (phi));
985 base = fold_convert (type, base);
986 if (step)
987 step = fold_convert (type, step);
989 set_iv (data, PHI_RESULT (phi), base, step);
990 found = true;
993 return found;
996 /* Marks basic ivs. */
998 static void
999 mark_bivs (struct ivopts_data *data)
1001 tree phi, var;
1002 struct iv *iv, *incr_iv;
1003 struct loop *loop = data->current_loop;
1004 basic_block incr_bb;
1006 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1008 iv = get_iv (data, PHI_RESULT (phi));
1009 if (!iv)
1010 continue;
1012 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013 incr_iv = get_iv (data, var);
1014 if (!incr_iv)
1015 continue;
1017 /* If the increment is in the subloop, ignore it. */
1018 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019 if (incr_bb->loop_father != data->current_loop
1020 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021 continue;
1023 iv->biv_p = true;
1024 incr_iv->biv_p = true;
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029 parameters to IV. */
1031 static bool
1032 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1034 tree lhs;
1035 struct loop *loop = data->current_loop;
1037 iv->base = NULL_TREE;
1038 iv->step = NULL_TREE;
1040 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1041 return false;
1043 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1044 if (TREE_CODE (lhs) != SSA_NAME)
1045 return false;
1047 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1048 return false;
1049 iv->base = expand_simple_operations (iv->base);
1051 if (contains_abnormal_ssa_name_p (iv->base)
1052 || contains_abnormal_ssa_name_p (iv->step))
1053 return false;
1055 return true;
1058 /* Finds general ivs in statement STMT. */
1060 static void
1061 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1063 affine_iv iv;
1065 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066 return;
1068 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1071 /* Finds general ivs in basic block BB. */
1073 static void
1074 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1076 block_stmt_iterator bsi;
1078 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079 find_givs_in_stmt (data, bsi_stmt (bsi));
1082 /* Finds general ivs. */
1084 static void
1085 find_givs (struct ivopts_data *data)
1087 struct loop *loop = data->current_loop;
1088 basic_block *body = get_loop_body_in_dom_order (loop);
1089 unsigned i;
1091 for (i = 0; i < loop->num_nodes; i++)
1092 find_givs_in_bb (data, body[i]);
1093 free (body);
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097 variable and if so, its initial value and step. */
1099 static bool
1100 find_induction_variables (struct ivopts_data *data)
1102 unsigned i;
1103 bitmap_iterator bi;
1105 if (!find_bivs (data))
1106 return false;
1108 find_givs (data);
1109 mark_bivs (data);
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1113 tree niter = niter_for_single_dom_exit (data);
1115 if (niter)
1117 fprintf (dump_file, " number of iterations ");
1118 print_generic_expr (dump_file, niter, TDF_SLIM);
1119 fprintf (dump_file, "\n\n");
1122 fprintf (dump_file, "Induction variables:\n\n");
1124 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1126 if (ver_info (data, i)->iv)
1127 dump_iv (dump_file, ver_info (data, i)->iv);
1131 return true;
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1136 static struct iv_use *
1137 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138 tree stmt, enum use_type use_type)
1140 struct iv_use *use = XCNEW (struct iv_use);
1142 use->id = n_iv_uses (data);
1143 use->type = use_type;
1144 use->iv = iv;
1145 use->stmt = stmt;
1146 use->op_p = use_p;
1147 use->related_cands = BITMAP_ALLOC (NULL);
1149 /* To avoid showing ssa name in the dumps, if it was not reset by the
1150 caller. */
1151 iv->ssa_name = NULL_TREE;
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 dump_use (dump_file, use);
1156 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1158 return use;
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162 NONLINEAR_USE is true if the invariant is used in a way we do not
1163 handle specially. */
1165 static void
1166 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1168 basic_block bb;
1169 struct version_info *info;
1171 if (TREE_CODE (op) != SSA_NAME
1172 || !is_gimple_reg (op))
1173 return;
1175 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176 if (bb
1177 && flow_bb_inside_loop_p (data->current_loop, bb))
1178 return;
1180 info = name_info (data, op);
1181 info->name = op;
1182 info->has_nonlin_use |= nonlinear_use;
1183 if (!info->inv_id)
1184 info->inv_id = ++data->max_inv_id;
1185 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1188 /* Checks whether the use OP is interesting and if so, records it. */
1190 static struct iv_use *
1191 find_interesting_uses_op (struct ivopts_data *data, tree op)
1193 struct iv *iv;
1194 struct iv *civ;
1195 tree stmt;
1196 struct iv_use *use;
1198 if (TREE_CODE (op) != SSA_NAME)
1199 return NULL;
1201 iv = get_iv (data, op);
1202 if (!iv)
1203 return NULL;
1205 if (iv->have_use_for)
1207 use = iv_use (data, iv->use_id);
1209 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210 return use;
1213 if (zero_p (iv->step))
1215 record_invariant (data, op, true);
1216 return NULL;
1218 iv->have_use_for = true;
1220 civ = XNEW (struct iv);
1221 *civ = *iv;
1223 stmt = SSA_NAME_DEF_STMT (op);
1224 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1227 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228 iv->use_id = use->id;
1230 return use;
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234 and if so, records it. */
1236 static void
1237 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1239 tree *op0_p;
1240 tree *op1_p;
1241 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242 struct iv const_iv;
1243 tree zero = integer_zero_node;
1245 const_iv.step = NULL_TREE;
1247 if (TREE_CODE (*cond_p) != SSA_NAME
1248 && !COMPARISON_CLASS_P (*cond_p))
1249 return;
1251 if (TREE_CODE (*cond_p) == SSA_NAME)
1253 op0_p = cond_p;
1254 op1_p = &zero;
1256 else
1258 op0_p = &TREE_OPERAND (*cond_p, 0);
1259 op1_p = &TREE_OPERAND (*cond_p, 1);
1262 if (TREE_CODE (*op0_p) == SSA_NAME)
1263 iv0 = get_iv (data, *op0_p);
1264 else
1265 iv0 = &const_iv;
1267 if (TREE_CODE (*op1_p) == SSA_NAME)
1268 iv1 = get_iv (data, *op1_p);
1269 else
1270 iv1 = &const_iv;
1272 if (/* When comparing with non-invariant value, we may not do any senseful
1273 induction variable elimination. */
1274 (!iv0 || !iv1)
1275 /* Eliminating condition based on two ivs would be nontrivial.
1276 ??? TODO -- it is not really important to handle this case. */
1277 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1279 find_interesting_uses_op (data, *op0_p);
1280 find_interesting_uses_op (data, *op1_p);
1281 return;
1284 if (zero_p (iv0->step) && zero_p (iv1->step))
1286 /* If both are invariants, this is a work for unswitching. */
1287 return;
1290 civ = XNEW (struct iv);
1291 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296 i.e. if all its operands are defined outside of the LOOP. */
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1301 basic_block def_bb;
1302 unsigned i, len;
1304 if (is_gimple_min_invariant (expr))
1305 return true;
1307 if (TREE_CODE (expr) == SSA_NAME)
1309 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310 if (def_bb
1311 && flow_bb_inside_loop_p (loop, def_bb))
1312 return false;
1314 return true;
1317 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1318 return false;
1320 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321 for (i = 0; i < len; i++)
1322 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323 return false;
1325 return true;
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329 initial ones. Returns false when the value of the index cannot be determined.
1330 Callback for for_each_index. */
1332 struct ifs_ivopts_data
1334 struct ivopts_data *ivopts_data;
1335 tree stmt;
1336 tree *step_p;
1339 static bool
1340 idx_find_step (tree base, tree *idx, void *data)
1342 struct ifs_ivopts_data *dta = data;
1343 struct iv *iv;
1344 tree step, iv_base, iv_step, lbound, off;
1345 struct loop *loop = dta->ivopts_data->current_loop;
1347 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349 return false;
1351 /* If base is a component ref, require that the offset of the reference
1352 be invariant. */
1353 if (TREE_CODE (base) == COMPONENT_REF)
1355 off = component_ref_field_offset (base);
1356 return expr_invariant_in_loop_p (loop, off);
1359 /* If base is array, first check whether we will be able to move the
1360 reference out of the loop (in order to take its address in strength
1361 reduction). In order for this to work we need both lower bound
1362 and step to be loop invariants. */
1363 if (TREE_CODE (base) == ARRAY_REF)
1365 step = array_ref_element_size (base);
1366 lbound = array_ref_low_bound (base);
1368 if (!expr_invariant_in_loop_p (loop, step)
1369 || !expr_invariant_in_loop_p (loop, lbound))
1370 return false;
1373 if (TREE_CODE (*idx) != SSA_NAME)
1374 return true;
1376 iv = get_iv (dta->ivopts_data, *idx);
1377 if (!iv)
1378 return false;
1380 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1381 *&x[0], which is not folded and does not trigger the
1382 ARRAY_REF path below. */
1383 *idx = iv->base;
1385 if (!iv->step)
1386 return true;
1388 if (TREE_CODE (base) == ARRAY_REF)
1390 step = array_ref_element_size (base);
1392 /* We only handle addresses whose step is an integer constant. */
1393 if (TREE_CODE (step) != INTEGER_CST)
1394 return false;
1396 else
1397 /* The step for pointer arithmetics already is 1 byte. */
1398 step = build_int_cst (sizetype, 1);
1400 iv_base = iv->base;
1401 iv_step = iv->step;
1402 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403 sizetype, &iv_base, &iv_step, dta->stmt,
1404 false))
1406 /* The index might wrap. */
1407 return false;
1410 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 if (!*dta->step_p)
1413 *dta->step_p = step;
1414 else
1415 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1417 return true;
1420 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1421 object is passed to it in DATA. */
1423 static bool
1424 idx_record_use (tree base, tree *idx,
1425 void *data)
1427 find_interesting_uses_op (data, *idx);
1428 if (TREE_CODE (base) == ARRAY_REF)
1430 find_interesting_uses_op (data, array_ref_element_size (base));
1431 find_interesting_uses_op (data, array_ref_low_bound (base));
1433 return true;
1436 /* Returns true if memory reference REF may be unaligned. */
1438 static bool
1439 may_be_unaligned_p (tree ref)
1441 tree base;
1442 tree base_type;
1443 HOST_WIDE_INT bitsize;
1444 HOST_WIDE_INT bitpos;
1445 tree toffset;
1446 enum machine_mode mode;
1447 int unsignedp, volatilep;
1448 unsigned base_align;
1450 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451 thus they are not misaligned. */
1452 if (TREE_CODE (ref) == TARGET_MEM_REF)
1453 return false;
1455 /* The test below is basically copy of what expr.c:normal_inner_ref
1456 does to check whether the object must be loaded by parts when
1457 STRICT_ALIGNMENT is true. */
1458 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459 &unsignedp, &volatilep, true);
1460 base_type = TREE_TYPE (base);
1461 base_align = TYPE_ALIGN (base_type);
1463 if (mode != BLKmode
1464 && (base_align < GET_MODE_ALIGNMENT (mode)
1465 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466 || bitpos % BITS_PER_UNIT != 0))
1467 return true;
1469 return false;
1472 /* Return true if EXPR may be non-addressable. */
1474 static bool
1475 may_be_nonaddressable_p (tree expr)
1477 switch (TREE_CODE (expr))
1479 case COMPONENT_REF:
1480 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1483 case ARRAY_REF:
1484 case ARRAY_RANGE_REF:
1485 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1487 case VIEW_CONVERT_EXPR:
1488 /* This kind of view-conversions may wrap non-addressable objects
1489 and make them look addressable. After some processing the
1490 non-addressability may be uncovered again, causing ADDR_EXPRs
1491 of inappropriate objects to be built. */
1492 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1495 default:
1496 break;
1499 return false;
1502 /* Finds addresses in *OP_P inside STMT. */
1504 static void
1505 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1507 tree base = *op_p, step = NULL;
1508 struct iv *civ;
1509 struct ifs_ivopts_data ifs_ivopts_data;
1511 /* Do not play with volatile memory references. A bit too conservative,
1512 perhaps, but safe. */
1513 if (stmt_ann (stmt)->has_volatile_ops)
1514 goto fail;
1516 /* Ignore bitfields for now. Not really something terribly complicated
1517 to handle. TODO. */
1518 if (TREE_CODE (base) == BIT_FIELD_REF)
1519 goto fail;
1521 if (may_be_nonaddressable_p (base))
1522 goto fail;
1524 if (STRICT_ALIGNMENT
1525 && may_be_unaligned_p (base))
1526 goto fail;
1528 base = unshare_expr (base);
1530 if (TREE_CODE (base) == TARGET_MEM_REF)
1532 tree type = build_pointer_type (TREE_TYPE (base));
1533 tree astep;
1535 if (TMR_BASE (base)
1536 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1538 civ = get_iv (data, TMR_BASE (base));
1539 if (!civ)
1540 goto fail;
1542 TMR_BASE (base) = civ->base;
1543 step = civ->step;
1545 if (TMR_INDEX (base)
1546 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1548 civ = get_iv (data, TMR_INDEX (base));
1549 if (!civ)
1550 goto fail;
1552 TMR_INDEX (base) = civ->base;
1553 astep = civ->step;
1555 if (astep)
1557 if (TMR_STEP (base))
1558 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1560 if (step)
1561 step = fold_build2 (PLUS_EXPR, type, step, astep);
1562 else
1563 step = astep;
1567 if (zero_p (step))
1568 goto fail;
1569 base = tree_mem_ref_addr (type, base);
1571 else
1573 ifs_ivopts_data.ivopts_data = data;
1574 ifs_ivopts_data.stmt = stmt;
1575 ifs_ivopts_data.step_p = &step;
1576 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577 || zero_p (step))
1578 goto fail;
1580 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1583 base = build_fold_addr_expr (base);
1585 /* Substituting bases of IVs into the base expression might
1586 have caused folding opportunities. */
1587 if (TREE_CODE (base) == ADDR_EXPR)
1589 tree *ref = &TREE_OPERAND (base, 0);
1590 while (handled_component_p (*ref))
1591 ref = &TREE_OPERAND (*ref, 0);
1592 if (TREE_CODE (*ref) == INDIRECT_REF)
1593 *ref = fold_indirect_ref (*ref);
1597 civ = alloc_iv (base, step);
1598 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599 return;
1601 fail:
1602 for_each_index (op_p, idx_record_use, data);
1605 /* Finds and records invariants used in STMT. */
1607 static void
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1610 ssa_op_iter iter;
1611 use_operand_p use_p;
1612 tree op;
1614 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1616 op = USE_FROM_PTR (use_p);
1617 record_invariant (data, op, false);
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1623 static void
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1626 struct iv *iv;
1627 tree op, lhs, rhs;
1628 ssa_op_iter iter;
1629 use_operand_p use_p;
1631 find_invariants_stmt (data, stmt);
1633 if (TREE_CODE (stmt) == COND_EXPR)
1635 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636 return;
1639 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1641 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1642 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1644 if (TREE_CODE (lhs) == SSA_NAME)
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1649 iv = get_iv (data, lhs);
1651 if (iv && !zero_p (iv->step))
1652 return;
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1657 case tcc_comparison:
1658 find_interesting_uses_cond (data, stmt,
1659 &GIMPLE_STMT_OPERAND (stmt, 1));
1660 return;
1662 case tcc_reference:
1663 find_interesting_uses_address (data, stmt,
1664 &GIMPLE_STMT_OPERAND (stmt, 1));
1665 if (REFERENCE_CLASS_P (lhs))
1666 find_interesting_uses_address (data, stmt,
1667 &GIMPLE_STMT_OPERAND (stmt, 0));
1668 return;
1670 default: ;
1673 if (REFERENCE_CLASS_P (lhs)
1674 && is_gimple_val (rhs))
1676 find_interesting_uses_address (data, stmt,
1677 &GIMPLE_STMT_OPERAND (stmt, 0));
1678 find_interesting_uses_op (data, rhs);
1679 return;
1682 /* TODO -- we should also handle address uses of type
1684 memory = call (whatever);
1688 call (memory). */
1691 if (TREE_CODE (stmt) == PHI_NODE
1692 && bb_for_stmt (stmt) == data->current_loop->header)
1694 lhs = PHI_RESULT (stmt);
1695 iv = get_iv (data, lhs);
1697 if (iv && !zero_p (iv->step))
1698 return;
1701 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1703 op = USE_FROM_PTR (use_p);
1705 if (TREE_CODE (op) != SSA_NAME)
1706 continue;
1708 iv = get_iv (data, op);
1709 if (!iv)
1710 continue;
1712 find_interesting_uses_op (data, op);
1716 /* Finds interesting uses of induction variables outside of loops
1717 on loop exit edge EXIT. */
1719 static void
1720 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1722 tree phi, def;
1724 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1726 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1727 find_interesting_uses_op (data, def);
1731 /* Finds uses of the induction variables that are interesting. */
1733 static void
1734 find_interesting_uses (struct ivopts_data *data)
1736 basic_block bb;
1737 block_stmt_iterator bsi;
1738 tree phi;
1739 basic_block *body = get_loop_body (data->current_loop);
1740 unsigned i;
1741 struct version_info *info;
1742 edge e;
1744 if (dump_file && (dump_flags & TDF_DETAILS))
1745 fprintf (dump_file, "Uses:\n\n");
1747 for (i = 0; i < data->current_loop->num_nodes; i++)
1749 edge_iterator ei;
1750 bb = body[i];
1752 FOR_EACH_EDGE (e, ei, bb->succs)
1753 if (e->dest != EXIT_BLOCK_PTR
1754 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1755 find_interesting_uses_outside (data, e);
1757 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1758 find_interesting_uses_stmt (data, phi);
1759 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1760 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1763 if (dump_file && (dump_flags & TDF_DETAILS))
1765 bitmap_iterator bi;
1767 fprintf (dump_file, "\n");
1769 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1771 info = ver_info (data, i);
1772 if (info->inv_id)
1774 fprintf (dump_file, " ");
1775 print_generic_expr (dump_file, info->name, TDF_SLIM);
1776 fprintf (dump_file, " is invariant (%d)%s\n",
1777 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1781 fprintf (dump_file, "\n");
1784 free (body);
1787 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1788 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1789 we are at the top-level of the processed address. */
1791 static tree
1792 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1793 unsigned HOST_WIDE_INT *offset)
1795 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1796 enum tree_code code;
1797 tree type, orig_type = TREE_TYPE (expr);
1798 unsigned HOST_WIDE_INT off0, off1, st;
1799 tree orig_expr = expr;
1801 STRIP_NOPS (expr);
1803 type = TREE_TYPE (expr);
1804 code = TREE_CODE (expr);
1805 *offset = 0;
1807 switch (code)
1809 case INTEGER_CST:
1810 if (!cst_and_fits_in_hwi (expr)
1811 || zero_p (expr))
1812 return orig_expr;
1814 *offset = int_cst_value (expr);
1815 return build_int_cst (orig_type, 0);
1817 case PLUS_EXPR:
1818 case MINUS_EXPR:
1819 op0 = TREE_OPERAND (expr, 0);
1820 op1 = TREE_OPERAND (expr, 1);
1822 op0 = strip_offset_1 (op0, false, false, &off0);
1823 op1 = strip_offset_1 (op1, false, false, &off1);
1825 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1826 if (op0 == TREE_OPERAND (expr, 0)
1827 && op1 == TREE_OPERAND (expr, 1))
1828 return orig_expr;
1830 if (zero_p (op1))
1831 expr = op0;
1832 else if (zero_p (op0))
1834 if (code == PLUS_EXPR)
1835 expr = op1;
1836 else
1837 expr = fold_build1 (NEGATE_EXPR, type, op1);
1839 else
1840 expr = fold_build2 (code, type, op0, op1);
1842 return fold_convert (orig_type, expr);
1844 case ARRAY_REF:
1845 if (!inside_addr)
1846 return orig_expr;
1848 step = array_ref_element_size (expr);
1849 if (!cst_and_fits_in_hwi (step))
1850 break;
1852 st = int_cst_value (step);
1853 op1 = TREE_OPERAND (expr, 1);
1854 op1 = strip_offset_1 (op1, false, false, &off1);
1855 *offset = off1 * st;
1857 if (top_compref
1858 && zero_p (op1))
1860 /* Strip the component reference completely. */
1861 op0 = TREE_OPERAND (expr, 0);
1862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1863 *offset += off0;
1864 return op0;
1866 break;
1868 case COMPONENT_REF:
1869 if (!inside_addr)
1870 return orig_expr;
1872 tmp = component_ref_field_offset (expr);
1873 if (top_compref
1874 && cst_and_fits_in_hwi (tmp))
1876 /* Strip the component reference completely. */
1877 op0 = TREE_OPERAND (expr, 0);
1878 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1879 *offset = off0 + int_cst_value (tmp);
1880 return op0;
1882 break;
1884 case ADDR_EXPR:
1885 op0 = TREE_OPERAND (expr, 0);
1886 op0 = strip_offset_1 (op0, true, true, &off0);
1887 *offset += off0;
1889 if (op0 == TREE_OPERAND (expr, 0))
1890 return orig_expr;
1892 expr = build_fold_addr_expr (op0);
1893 return fold_convert (orig_type, expr);
1895 case INDIRECT_REF:
1896 inside_addr = false;
1897 break;
1899 default:
1900 return orig_expr;
1903 /* Default handling of expressions for that we want to recurse into
1904 the first operand. */
1905 op0 = TREE_OPERAND (expr, 0);
1906 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907 *offset += off0;
1909 if (op0 == TREE_OPERAND (expr, 0)
1910 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911 return orig_expr;
1913 expr = copy_node (expr);
1914 TREE_OPERAND (expr, 0) = op0;
1915 if (op1)
1916 TREE_OPERAND (expr, 1) = op1;
1918 /* Inside address, we might strip the top level component references,
1919 thus changing type of the expression. Handling of ADDR_EXPR
1920 will fix that. */
1921 expr = fold_convert (orig_type, expr);
1923 return expr;
1926 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1928 static tree
1929 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1931 return strip_offset_1 (expr, false, false, offset);
1934 /* Returns variant of TYPE that can be used as base for different uses.
1935 We return unsigned type with the same precision, which avoids problems
1936 with overflows. */
1938 static tree
1939 generic_type_for (tree type)
1941 if (POINTER_TYPE_P (type))
1942 return unsigned_type_for (type);
1944 if (TYPE_UNSIGNED (type))
1945 return type;
1947 return unsigned_type_for (type);
1950 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1951 the bitmap to that we should store it. */
1953 static struct ivopts_data *fd_ivopts_data;
1954 static tree
1955 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1957 bitmap *depends_on = data;
1958 struct version_info *info;
1960 if (TREE_CODE (*expr_p) != SSA_NAME)
1961 return NULL_TREE;
1962 info = name_info (fd_ivopts_data, *expr_p);
1964 if (!info->inv_id || info->has_nonlin_use)
1965 return NULL_TREE;
1967 if (!*depends_on)
1968 *depends_on = BITMAP_ALLOC (NULL);
1969 bitmap_set_bit (*depends_on, info->inv_id);
1971 return NULL_TREE;
1974 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1975 position to POS. If USE is not NULL, the candidate is set as related to
1976 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1977 replacement of the final value of the iv by a direct computation. */
1979 static struct iv_cand *
1980 add_candidate_1 (struct ivopts_data *data,
1981 tree base, tree step, bool important, enum iv_position pos,
1982 struct iv_use *use, tree incremented_at)
1984 unsigned i;
1985 struct iv_cand *cand = NULL;
1986 tree type, orig_type;
1988 if (base)
1990 orig_type = TREE_TYPE (base);
1991 type = generic_type_for (orig_type);
1992 if (type != orig_type)
1994 base = fold_convert (type, base);
1995 if (step)
1996 step = fold_convert (type, step);
2000 for (i = 0; i < n_iv_cands (data); i++)
2002 cand = iv_cand (data, i);
2004 if (cand->pos != pos)
2005 continue;
2007 if (cand->incremented_at != incremented_at)
2008 continue;
2010 if (!cand->iv)
2012 if (!base && !step)
2013 break;
2015 continue;
2018 if (!base && !step)
2019 continue;
2021 if (!operand_equal_p (base, cand->iv->base, 0))
2022 continue;
2024 if (zero_p (cand->iv->step))
2026 if (zero_p (step))
2027 break;
2029 else
2031 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 break;
2036 if (i == n_iv_cands (data))
2038 cand = XCNEW (struct iv_cand);
2039 cand->id = i;
2041 if (!base && !step)
2042 cand->iv = NULL;
2043 else
2044 cand->iv = alloc_iv (base, step);
2046 cand->pos = pos;
2047 if (pos != IP_ORIGINAL && cand->iv)
2049 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2050 cand->var_after = cand->var_before;
2052 cand->important = important;
2053 cand->incremented_at = incremented_at;
2054 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2056 if (step
2057 && TREE_CODE (step) != INTEGER_CST)
2059 fd_ivopts_data = data;
2060 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 dump_cand (dump_file, cand);
2067 if (important && !cand->important)
2069 cand->important = true;
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2074 if (use)
2076 bitmap_set_bit (use->related_cands, i);
2077 if (dump_file && (dump_flags & TDF_DETAILS))
2078 fprintf (dump_file, "Candidate %d is related to use %d\n",
2079 cand->id, use->id);
2082 return cand;
2085 /* Returns true if incrementing the induction variable at the end of the LOOP
2086 is allowed.
2088 The purpose is to avoid splitting latch edge with a biv increment, thus
2089 creating a jump, possibly confusing other optimization passes and leaving
2090 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2091 is not available (so we do not have a better alternative), or if the latch
2092 edge is already nonempty. */
2094 static bool
2095 allow_ip_end_pos_p (struct loop *loop)
2097 if (!ip_normal_pos (loop))
2098 return true;
2100 if (!empty_block_p (ip_end_pos (loop)))
2101 return true;
2103 return false;
2106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2107 position to POS. If USE is not NULL, the candidate is set as related to
2108 it. The candidate computation is scheduled on all available positions. */
2110 static void
2111 add_candidate (struct ivopts_data *data,
2112 tree base, tree step, bool important, struct iv_use *use)
2114 if (ip_normal_pos (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2116 if (ip_end_pos (data->current_loop)
2117 && allow_ip_end_pos_p (data->current_loop))
2118 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2121 /* Add a standard "0 + 1 * iteration" iv candidate for a
2122 type with SIZE bits. */
2124 static void
2125 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126 unsigned int size)
2128 tree type = lang_hooks.types.type_for_size (size, true);
2129 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2130 true, NULL);
2133 /* Adds standard iv candidates. */
2135 static void
2136 add_standard_iv_candidates (struct ivopts_data *data)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2140 /* The same for a double-integer type if it is still fast enough. */
2141 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2142 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2146 /* Adds candidates bases on the old induction variable IV. */
2148 static void
2149 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2151 tree phi, def;
2152 struct iv_cand *cand;
2154 add_candidate (data, iv->base, iv->step, true, NULL);
2156 /* The same, but with initial value zero. */
2157 add_candidate (data,
2158 build_int_cst (TREE_TYPE (iv->base), 0),
2159 iv->step, true, NULL);
2161 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2162 if (TREE_CODE (phi) == PHI_NODE)
2164 /* Additionally record the possibility of leaving the original iv
2165 untouched. */
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2167 cand = add_candidate_1 (data,
2168 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2169 SSA_NAME_DEF_STMT (def));
2170 cand->var_before = iv->ssa_name;
2171 cand->var_after = def;
2175 /* Adds candidates based on the old induction variables. */
2177 static void
2178 add_old_ivs_candidates (struct ivopts_data *data)
2180 unsigned i;
2181 struct iv *iv;
2182 bitmap_iterator bi;
2184 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2186 iv = ver_info (data, i)->iv;
2187 if (iv && iv->biv_p && !zero_p (iv->step))
2188 add_old_iv_candidates (data, iv);
2192 /* Adds candidates based on the value of the induction variable IV and USE. */
2194 static void
2195 add_iv_value_candidates (struct ivopts_data *data,
2196 struct iv *iv, struct iv_use *use)
2198 unsigned HOST_WIDE_INT offset;
2199 tree base;
2201 add_candidate (data, iv->base, iv->step, false, use);
2203 /* The same, but with initial value zero. Make such variable important,
2204 since it is generic enough so that possibly many uses may be based
2205 on it. */
2206 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2207 iv->step, true, use);
2209 /* Third, try removing the constant offset. */
2210 base = strip_offset (iv->base, &offset);
2211 if (offset)
2212 add_candidate (data, base, iv->step, false, use);
2215 /* Adds candidates based on the uses. */
2217 static void
2218 add_derived_ivs_candidates (struct ivopts_data *data)
2220 unsigned i;
2222 for (i = 0; i < n_iv_uses (data); i++)
2224 struct iv_use *use = iv_use (data, i);
2226 if (!use)
2227 continue;
2229 switch (use->type)
2231 case USE_NONLINEAR_EXPR:
2232 case USE_COMPARE:
2233 case USE_ADDRESS:
2234 /* Just add the ivs based on the value of the iv used here. */
2235 add_iv_value_candidates (data, use->iv, use);
2236 break;
2238 default:
2239 gcc_unreachable ();
2244 /* Record important candidates and add them to related_cands bitmaps
2245 if needed. */
2247 static void
2248 record_important_candidates (struct ivopts_data *data)
2250 unsigned i;
2251 struct iv_use *use;
2253 for (i = 0; i < n_iv_cands (data); i++)
2255 struct iv_cand *cand = iv_cand (data, i);
2257 if (cand->important)
2258 bitmap_set_bit (data->important_candidates, i);
2261 data->consider_all_candidates = (n_iv_cands (data)
2262 <= CONSIDER_ALL_CANDIDATES_BOUND);
2264 if (data->consider_all_candidates)
2266 /* We will not need "related_cands" bitmaps in this case,
2267 so release them to decrease peak memory consumption. */
2268 for (i = 0; i < n_iv_uses (data); i++)
2270 use = iv_use (data, i);
2271 BITMAP_FREE (use->related_cands);
2274 else
2276 /* Add important candidates to the related_cands bitmaps. */
2277 for (i = 0; i < n_iv_uses (data); i++)
2278 bitmap_ior_into (iv_use (data, i)->related_cands,
2279 data->important_candidates);
2283 /* Finds the candidates for the induction variables. */
2285 static void
2286 find_iv_candidates (struct ivopts_data *data)
2288 /* Add commonly used ivs. */
2289 add_standard_iv_candidates (data);
2291 /* Add old induction variables. */
2292 add_old_ivs_candidates (data);
2294 /* Add induction variables derived from uses. */
2295 add_derived_ivs_candidates (data);
2297 /* Record the important candidates. */
2298 record_important_candidates (data);
2301 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2302 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2303 we allocate a simple list to every use. */
2305 static void
2306 alloc_use_cost_map (struct ivopts_data *data)
2308 unsigned i, size, s, j;
2310 for (i = 0; i < n_iv_uses (data); i++)
2312 struct iv_use *use = iv_use (data, i);
2313 bitmap_iterator bi;
2315 if (data->consider_all_candidates)
2316 size = n_iv_cands (data);
2317 else
2319 s = 0;
2320 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2322 s++;
2325 /* Round up to the power of two, so that moduling by it is fast. */
2326 for (size = 1; size < s; size <<= 1)
2327 continue;
2330 use->n_map_members = size;
2331 use->cost_map = XCNEWVEC (struct cost_pair, size);
2335 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2336 on invariants DEPENDS_ON and that the value used in expressing it
2337 is VALUE.*/
2339 static void
2340 set_use_iv_cost (struct ivopts_data *data,
2341 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2342 bitmap depends_on, tree value)
2344 unsigned i, s;
2346 if (cost == INFTY)
2348 BITMAP_FREE (depends_on);
2349 return;
2352 if (data->consider_all_candidates)
2354 use->cost_map[cand->id].cand = cand;
2355 use->cost_map[cand->id].cost = cost;
2356 use->cost_map[cand->id].depends_on = depends_on;
2357 use->cost_map[cand->id].value = value;
2358 return;
2361 /* n_map_members is a power of two, so this computes modulo. */
2362 s = cand->id & (use->n_map_members - 1);
2363 for (i = s; i < use->n_map_members; i++)
2364 if (!use->cost_map[i].cand)
2365 goto found;
2366 for (i = 0; i < s; i++)
2367 if (!use->cost_map[i].cand)
2368 goto found;
2370 gcc_unreachable ();
2372 found:
2373 use->cost_map[i].cand = cand;
2374 use->cost_map[i].cost = cost;
2375 use->cost_map[i].depends_on = depends_on;
2376 use->cost_map[i].value = value;
2379 /* Gets cost of (USE, CANDIDATE) pair. */
2381 static struct cost_pair *
2382 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2383 struct iv_cand *cand)
2385 unsigned i, s;
2386 struct cost_pair *ret;
2388 if (!cand)
2389 return NULL;
2391 if (data->consider_all_candidates)
2393 ret = use->cost_map + cand->id;
2394 if (!ret->cand)
2395 return NULL;
2397 return ret;
2400 /* n_map_members is a power of two, so this computes modulo. */
2401 s = cand->id & (use->n_map_members - 1);
2402 for (i = s; i < use->n_map_members; i++)
2403 if (use->cost_map[i].cand == cand)
2404 return use->cost_map + i;
2406 for (i = 0; i < s; i++)
2407 if (use->cost_map[i].cand == cand)
2408 return use->cost_map + i;
2410 return NULL;
2413 /* Returns estimate on cost of computing SEQ. */
2415 static unsigned
2416 seq_cost (rtx seq)
2418 unsigned cost = 0;
2419 rtx set;
2421 for (; seq; seq = NEXT_INSN (seq))
2423 set = single_set (seq);
2424 if (set)
2425 cost += rtx_cost (set, SET);
2426 else
2427 cost++;
2430 return cost;
2433 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2434 static rtx
2435 produce_memory_decl_rtl (tree obj, int *regno)
2437 rtx x;
2439 gcc_assert (obj);
2440 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2442 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2443 x = gen_rtx_SYMBOL_REF (Pmode, name);
2445 else
2446 x = gen_raw_REG (Pmode, (*regno)++);
2448 return gen_rtx_MEM (DECL_MODE (obj), x);
2451 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2452 walk_tree. DATA contains the actual fake register number. */
2454 static tree
2455 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2457 tree obj = NULL_TREE;
2458 rtx x = NULL_RTX;
2459 int *regno = data;
2461 switch (TREE_CODE (*expr_p))
2463 case ADDR_EXPR:
2464 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2465 handled_component_p (*expr_p);
2466 expr_p = &TREE_OPERAND (*expr_p, 0))
2467 continue;
2468 obj = *expr_p;
2469 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2470 x = produce_memory_decl_rtl (obj, regno);
2471 break;
2473 case SSA_NAME:
2474 *ws = 0;
2475 obj = SSA_NAME_VAR (*expr_p);
2476 if (!DECL_RTL_SET_P (obj))
2477 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2478 break;
2480 case VAR_DECL:
2481 case PARM_DECL:
2482 case RESULT_DECL:
2483 *ws = 0;
2484 obj = *expr_p;
2486 if (DECL_RTL_SET_P (obj))
2487 break;
2489 if (DECL_MODE (obj) == BLKmode)
2490 x = produce_memory_decl_rtl (obj, regno);
2491 else
2492 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2494 break;
2496 default:
2497 break;
2500 if (x)
2502 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2503 SET_DECL_RTL (obj, x);
2506 return NULL_TREE;
2509 /* Determines cost of the computation of EXPR. */
2511 static unsigned
2512 computation_cost (tree expr)
2514 rtx seq, rslt;
2515 tree type = TREE_TYPE (expr);
2516 unsigned cost;
2517 /* Avoid using hard regs in ways which may be unsupported. */
2518 int regno = LAST_VIRTUAL_REGISTER + 1;
2520 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2521 start_sequence ();
2522 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2523 seq = get_insns ();
2524 end_sequence ();
2526 cost = seq_cost (seq);
2527 if (MEM_P (rslt))
2528 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2530 return cost;
2533 /* Returns variable containing the value of candidate CAND at statement AT. */
2535 static tree
2536 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2538 if (stmt_after_increment (loop, cand, stmt))
2539 return cand->var_after;
2540 else
2541 return cand->var_before;
2544 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2545 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2548 tree_int_cst_sign_bit (tree t)
2550 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2551 unsigned HOST_WIDE_INT w;
2553 if (bitno < HOST_BITS_PER_WIDE_INT)
2554 w = TREE_INT_CST_LOW (t);
2555 else
2557 w = TREE_INT_CST_HIGH (t);
2558 bitno -= HOST_BITS_PER_WIDE_INT;
2561 return (w >> bitno) & 1;
2564 /* If we can prove that TOP = cst * BOT for some constant cst,
2565 store cst to MUL and return true. Otherwise return false.
2566 The returned value is always sign-extended, regardless of the
2567 signedness of TOP and BOT. */
2569 static bool
2570 constant_multiple_of (tree top, tree bot, double_int *mul)
2572 tree mby;
2573 enum tree_code code;
2574 double_int res, p0, p1;
2575 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2577 STRIP_NOPS (top);
2578 STRIP_NOPS (bot);
2580 if (operand_equal_p (top, bot, 0))
2582 *mul = double_int_one;
2583 return true;
2586 code = TREE_CODE (top);
2587 switch (code)
2589 case MULT_EXPR:
2590 mby = TREE_OPERAND (top, 1);
2591 if (TREE_CODE (mby) != INTEGER_CST)
2592 return false;
2594 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2595 return false;
2597 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2598 precision);
2599 return true;
2601 case PLUS_EXPR:
2602 case MINUS_EXPR:
2603 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2604 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2605 return false;
2607 if (code == MINUS_EXPR)
2608 p1 = double_int_neg (p1);
2609 *mul = double_int_sext (double_int_add (p0, p1), precision);
2610 return true;
2612 case INTEGER_CST:
2613 if (TREE_CODE (bot) != INTEGER_CST)
2614 return false;
2616 p0 = double_int_sext (tree_to_double_int (bot), precision);
2617 p1 = double_int_sext (tree_to_double_int (top), precision);
2618 if (double_int_zero_p (p1))
2619 return false;
2620 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2621 precision);
2622 return double_int_zero_p (res);
2624 default:
2625 return false;
2629 /* Sets COMB to CST. */
2631 static void
2632 aff_combination_const (struct affine_tree_combination *comb, tree type,
2633 unsigned HOST_WIDE_INT cst)
2635 unsigned prec = TYPE_PRECISION (type);
2637 comb->type = type;
2638 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2640 comb->n = 0;
2641 comb->rest = NULL_TREE;
2642 comb->offset = cst & comb->mask;
2645 /* Sets COMB to single element ELT. */
2647 static void
2648 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2650 unsigned prec = TYPE_PRECISION (type);
2652 comb->type = type;
2653 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2655 comb->n = 1;
2656 comb->elts[0] = elt;
2657 comb->coefs[0] = 1;
2658 comb->rest = NULL_TREE;
2659 comb->offset = 0;
2662 /* Scales COMB by SCALE. */
2664 static void
2665 aff_combination_scale (struct affine_tree_combination *comb,
2666 unsigned HOST_WIDE_INT scale)
2668 unsigned i, j;
2670 if (scale == 1)
2671 return;
2673 if (scale == 0)
2675 aff_combination_const (comb, comb->type, 0);
2676 return;
2679 comb->offset = (scale * comb->offset) & comb->mask;
2680 for (i = 0, j = 0; i < comb->n; i++)
2682 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2683 comb->elts[j] = comb->elts[i];
2684 if (comb->coefs[j] != 0)
2685 j++;
2687 comb->n = j;
2689 if (comb->rest)
2691 if (comb->n < MAX_AFF_ELTS)
2693 comb->coefs[comb->n] = scale;
2694 comb->elts[comb->n] = comb->rest;
2695 comb->rest = NULL_TREE;
2696 comb->n++;
2698 else
2699 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2700 build_int_cst_type (comb->type, scale));
2704 /* Adds ELT * SCALE to COMB. */
2706 static void
2707 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2708 unsigned HOST_WIDE_INT scale)
2710 unsigned i;
2712 if (scale == 0)
2713 return;
2715 for (i = 0; i < comb->n; i++)
2716 if (operand_equal_p (comb->elts[i], elt, 0))
2718 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2719 if (comb->coefs[i])
2720 return;
2722 comb->n--;
2723 comb->coefs[i] = comb->coefs[comb->n];
2724 comb->elts[i] = comb->elts[comb->n];
2726 if (comb->rest)
2728 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2729 comb->coefs[comb->n] = 1;
2730 comb->elts[comb->n] = comb->rest;
2731 comb->rest = NULL_TREE;
2732 comb->n++;
2734 return;
2736 if (comb->n < MAX_AFF_ELTS)
2738 comb->coefs[comb->n] = scale;
2739 comb->elts[comb->n] = elt;
2740 comb->n++;
2741 return;
2744 if (scale == 1)
2745 elt = fold_convert (comb->type, elt);
2746 else
2747 elt = fold_build2 (MULT_EXPR, comb->type,
2748 fold_convert (comb->type, elt),
2749 build_int_cst_type (comb->type, scale));
2751 if (comb->rest)
2752 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2753 else
2754 comb->rest = elt;
2757 /* Adds COMB2 to COMB1. */
2759 static void
2760 aff_combination_add (struct affine_tree_combination *comb1,
2761 struct affine_tree_combination *comb2)
2763 unsigned i;
2765 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2766 for (i = 0; i < comb2->n; i++)
2767 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2768 if (comb2->rest)
2769 aff_combination_add_elt (comb1, comb2->rest, 1);
2772 /* Convert COMB to TYPE. */
2774 static void
2775 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2777 unsigned prec = TYPE_PRECISION (type);
2778 unsigned i;
2780 /* If the precision of both types is the same, it suffices to change the type
2781 of the whole combination -- the elements are allowed to have another type
2782 equivalent wrto STRIP_NOPS. */
2783 if (prec == TYPE_PRECISION (comb->type))
2785 comb->type = type;
2786 return;
2789 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2790 comb->offset = comb->offset & comb->mask;
2792 /* The type of the elements can be different from comb->type only as
2793 much as what STRIP_NOPS would remove. We can just directly cast
2794 to TYPE. */
2795 for (i = 0; i < comb->n; i++)
2796 comb->elts[i] = fold_convert (type, comb->elts[i]);
2797 if (comb->rest)
2798 comb->rest = fold_convert (type, comb->rest);
2800 comb->type = type;
2803 /* Splits EXPR into an affine combination of parts. */
2805 static void
2806 tree_to_aff_combination (tree expr, tree type,
2807 struct affine_tree_combination *comb)
2809 struct affine_tree_combination tmp;
2810 enum tree_code code;
2811 tree cst, core, toffset;
2812 HOST_WIDE_INT bitpos, bitsize;
2813 enum machine_mode mode;
2814 int unsignedp, volatilep;
2816 STRIP_NOPS (expr);
2818 code = TREE_CODE (expr);
2819 switch (code)
2821 case INTEGER_CST:
2822 aff_combination_const (comb, type, int_cst_value (expr));
2823 return;
2825 case PLUS_EXPR:
2826 case MINUS_EXPR:
2827 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2828 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2829 if (code == MINUS_EXPR)
2830 aff_combination_scale (&tmp, -1);
2831 aff_combination_add (comb, &tmp);
2832 return;
2834 case MULT_EXPR:
2835 cst = TREE_OPERAND (expr, 1);
2836 if (TREE_CODE (cst) != INTEGER_CST)
2837 break;
2838 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839 aff_combination_scale (comb, int_cst_value (cst));
2840 return;
2842 case NEGATE_EXPR:
2843 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844 aff_combination_scale (comb, -1);
2845 return;
2847 case ADDR_EXPR:
2848 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2849 &toffset, &mode, &unsignedp, &volatilep,
2850 false);
2851 if (bitpos % BITS_PER_UNIT != 0)
2852 break;
2853 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2854 core = build_fold_addr_expr (core);
2855 if (TREE_CODE (core) == ADDR_EXPR)
2856 aff_combination_add_elt (comb, core, 1);
2857 else
2859 tree_to_aff_combination (core, type, &tmp);
2860 aff_combination_add (comb, &tmp);
2862 if (toffset)
2864 tree_to_aff_combination (toffset, type, &tmp);
2865 aff_combination_add (comb, &tmp);
2867 return;
2869 default:
2870 break;
2873 aff_combination_elt (comb, type, expr);
2876 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2878 static tree
2879 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2880 unsigned HOST_WIDE_INT mask)
2882 enum tree_code code;
2884 scale &= mask;
2885 elt = fold_convert (type, elt);
2887 if (scale == 1)
2889 if (!expr)
2890 return elt;
2892 return fold_build2 (PLUS_EXPR, type, expr, elt);
2895 if (scale == mask)
2897 if (!expr)
2898 return fold_build1 (NEGATE_EXPR, type, elt);
2900 return fold_build2 (MINUS_EXPR, type, expr, elt);
2903 if (!expr)
2904 return fold_build2 (MULT_EXPR, type, elt,
2905 build_int_cst_type (type, scale));
2907 if ((scale | (mask >> 1)) == mask)
2909 /* Scale is negative. */
2910 code = MINUS_EXPR;
2911 scale = (-scale) & mask;
2913 else
2914 code = PLUS_EXPR;
2916 elt = fold_build2 (MULT_EXPR, type, elt,
2917 build_int_cst_type (type, scale));
2918 return fold_build2 (code, type, expr, elt);
2921 /* Copies the tree elements of COMB to ensure that they are not shared. */
2923 static void
2924 unshare_aff_combination (struct affine_tree_combination *comb)
2926 unsigned i;
2928 for (i = 0; i < comb->n; i++)
2929 comb->elts[i] = unshare_expr (comb->elts[i]);
2930 if (comb->rest)
2931 comb->rest = unshare_expr (comb->rest);
2934 /* Makes tree from the affine combination COMB. */
2936 static tree
2937 aff_combination_to_tree (struct affine_tree_combination *comb)
2939 tree type = comb->type;
2940 tree expr = comb->rest;
2941 unsigned i;
2942 unsigned HOST_WIDE_INT off, sgn;
2944 if (comb->n == 0 && comb->offset == 0)
2946 if (expr)
2948 /* Handle the special case produced by get_computation_aff when
2949 the type does not fit in HOST_WIDE_INT. */
2950 return fold_convert (type, expr);
2952 else
2953 return build_int_cst (type, 0);
2956 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2958 for (i = 0; i < comb->n; i++)
2959 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2960 comb->mask);
2962 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2964 /* Offset is negative. */
2965 off = (-comb->offset) & comb->mask;
2966 sgn = comb->mask;
2968 else
2970 off = comb->offset;
2971 sgn = 1;
2973 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2974 comb->mask);
2977 /* Folds EXPR using the affine expressions framework. */
2979 static tree
2980 fold_affine_expr (tree expr)
2982 tree type = TREE_TYPE (expr);
2983 struct affine_tree_combination comb;
2985 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2986 return expr;
2988 tree_to_aff_combination (expr, type, &comb);
2989 return aff_combination_to_tree (&comb);
2992 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2993 same precision that is at least as wide as the precision of TYPE, stores
2994 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2995 type of A and B. */
2997 static tree
2998 determine_common_wider_type (tree *a, tree *b)
3000 tree wider_type = NULL;
3001 tree suba, subb;
3002 tree atype = TREE_TYPE (*a);
3004 if ((TREE_CODE (*a) == NOP_EXPR
3005 || TREE_CODE (*a) == CONVERT_EXPR))
3007 suba = TREE_OPERAND (*a, 0);
3008 wider_type = TREE_TYPE (suba);
3009 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010 return atype;
3012 else
3013 return atype;
3015 if ((TREE_CODE (*b) == NOP_EXPR
3016 || TREE_CODE (*b) == CONVERT_EXPR))
3018 subb = TREE_OPERAND (*b, 0);
3019 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3020 return atype;
3022 else
3023 return atype;
3025 *a = suba;
3026 *b = subb;
3027 return wider_type;
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The expression is stored in a decomposed
3032 form into AFF. Returns false if USE cannot be expressed using CAND. */
3034 static bool
3035 get_computation_aff (struct loop *loop,
3036 struct iv_use *use, struct iv_cand *cand, tree at,
3037 struct affine_tree_combination *aff)
3039 tree ubase = use->iv->base;
3040 tree ustep = use->iv->step;
3041 tree cbase = cand->iv->base;
3042 tree cstep = cand->iv->step;
3043 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3044 tree common_type;
3045 tree uutype;
3046 tree expr, delta;
3047 tree ratio;
3048 unsigned HOST_WIDE_INT ustepi, cstepi;
3049 HOST_WIDE_INT ratioi;
3050 struct affine_tree_combination cbase_aff, expr_aff;
3051 tree cstep_orig = cstep, ustep_orig = ustep;
3052 double_int rat;
3054 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3056 /* We do not have a precision to express the values of use. */
3057 return false;
3060 expr = var_at_stmt (loop, cand, at);
3062 if (TREE_TYPE (expr) != ctype)
3064 /* This may happen with the original ivs. */
3065 expr = fold_convert (ctype, expr);
3068 if (TYPE_UNSIGNED (utype))
3069 uutype = utype;
3070 else
3072 uutype = unsigned_type_for (utype);
3073 ubase = fold_convert (uutype, ubase);
3074 ustep = fold_convert (uutype, ustep);
3077 if (uutype != ctype)
3079 expr = fold_convert (uutype, expr);
3080 cbase = fold_convert (uutype, cbase);
3081 cstep = fold_convert (uutype, cstep);
3083 /* If the conversion is not noop, we must take it into account when
3084 considering the value of the step. */
3085 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3086 cstep_orig = cstep;
3089 if (cst_and_fits_in_hwi (cstep_orig)
3090 && cst_and_fits_in_hwi (ustep_orig))
3092 ustepi = int_cst_value (ustep_orig);
3093 cstepi = int_cst_value (cstep_orig);
3095 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3097 /* TODO maybe consider case when ustep divides cstep and the ratio is
3098 a power of 2 (so that the division is fast to execute)? We would
3099 need to be much more careful with overflows etc. then. */
3100 return false;
3103 ratio = build_int_cst_type (uutype, ratioi);
3105 else
3107 if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3108 return false;
3109 ratio = double_int_to_tree (uutype, rat);
3111 /* Ratioi is only used to detect special cases when the multiplicative
3112 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3113 set it to 0. */
3114 if (double_int_fits_in_shwi_p (rat))
3115 ratioi = double_int_to_shwi (rat);
3116 else
3117 ratioi = 0;
3120 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3121 type, we achieve better folding by computing their difference in this
3122 wider type, and cast the result to UUTYPE. We do not need to worry about
3123 overflows, as all the arithmetics will in the end be performed in UUTYPE
3124 anyway. */
3125 common_type = determine_common_wider_type (&ubase, &cbase);
3127 /* We may need to shift the value if we are after the increment. */
3128 if (stmt_after_increment (loop, cand, at))
3130 if (uutype != common_type)
3131 cstep = fold_convert (common_type, cstep);
3132 cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3135 /* use = ubase - ratio * cbase + ratio * var.
3137 In general case ubase + ratio * (var - cbase) could be better (one less
3138 multiplication), but often it is possible to eliminate redundant parts
3139 of computations from (ubase - ratio * cbase) term, and if it does not
3140 happen, fold is able to apply the distributive law to obtain this form
3141 anyway. */
3143 if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3145 /* Let's compute in trees and just return the result in AFF. This case
3146 should not be very common, and fold itself is not that bad either,
3147 so making the aff. functions more complicated to handle this case
3148 is not that urgent. */
3149 if (ratioi == 1)
3151 delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3152 if (uutype != common_type)
3153 delta = fold_convert (uutype, delta);
3154 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3156 else if (ratioi == -1)
3158 delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3159 if (uutype != common_type)
3160 delta = fold_convert (uutype, delta);
3161 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3163 else
3165 delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3166 delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3167 if (uutype != common_type)
3168 delta = fold_convert (uutype, delta);
3169 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3170 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3173 aff->type = uutype;
3174 aff->n = 0;
3175 aff->offset = 0;
3176 aff->mask = 0;
3177 aff->rest = expr;
3178 return true;
3181 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3182 possible to compute ratioi. */
3183 gcc_assert (ratioi);
3185 tree_to_aff_combination (ubase, common_type, aff);
3186 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3187 tree_to_aff_combination (expr, uutype, &expr_aff);
3188 aff_combination_scale (&cbase_aff, -ratioi);
3189 aff_combination_scale (&expr_aff, ratioi);
3190 aff_combination_add (aff, &cbase_aff);
3191 if (common_type != uutype)
3192 aff_combination_convert (uutype, aff);
3193 aff_combination_add (aff, &expr_aff);
3195 return true;
3198 /* Determines the expression by that USE is expressed from induction variable
3199 CAND at statement AT in LOOP. The computation is unshared. */
3201 static tree
3202 get_computation_at (struct loop *loop,
3203 struct iv_use *use, struct iv_cand *cand, tree at)
3205 struct affine_tree_combination aff;
3206 tree type = TREE_TYPE (use->iv->base);
3208 if (!get_computation_aff (loop, use, cand, at, &aff))
3209 return NULL_TREE;
3210 unshare_aff_combination (&aff);
3211 return fold_convert (type, aff_combination_to_tree (&aff));
3214 /* Determines the expression by that USE is expressed from induction variable
3215 CAND in LOOP. The computation is unshared. */
3217 static tree
3218 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3220 return get_computation_at (loop, use, cand, use->stmt);
3223 /* Returns cost of addition in MODE. */
3225 static unsigned
3226 add_cost (enum machine_mode mode)
3228 static unsigned costs[NUM_MACHINE_MODES];
3229 rtx seq;
3230 unsigned cost;
3232 if (costs[mode])
3233 return costs[mode];
3235 start_sequence ();
3236 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3237 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3238 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3239 NULL_RTX);
3240 seq = get_insns ();
3241 end_sequence ();
3243 cost = seq_cost (seq);
3244 if (!cost)
3245 cost = 1;
3247 costs[mode] = cost;
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "Addition in %s costs %d\n",
3251 GET_MODE_NAME (mode), cost);
3252 return cost;
3255 /* Entry in a hashtable of already known costs for multiplication. */
3256 struct mbc_entry
3258 HOST_WIDE_INT cst; /* The constant to multiply by. */
3259 enum machine_mode mode; /* In mode. */
3260 unsigned cost; /* The cost. */
3263 /* Counts hash value for the ENTRY. */
3265 static hashval_t
3266 mbc_entry_hash (const void *entry)
3268 const struct mbc_entry *e = entry;
3270 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3273 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3275 static int
3276 mbc_entry_eq (const void *entry1, const void *entry2)
3278 const struct mbc_entry *e1 = entry1;
3279 const struct mbc_entry *e2 = entry2;
3281 return (e1->mode == e2->mode
3282 && e1->cst == e2->cst);
3285 /* Returns cost of multiplication by constant CST in MODE. */
3287 unsigned
3288 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3290 static htab_t costs;
3291 struct mbc_entry **cached, act;
3292 rtx seq;
3293 unsigned cost;
3295 if (!costs)
3296 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3298 act.mode = mode;
3299 act.cst = cst;
3300 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3301 if (*cached)
3302 return (*cached)->cost;
3304 *cached = XNEW (struct mbc_entry);
3305 (*cached)->mode = mode;
3306 (*cached)->cst = cst;
3308 start_sequence ();
3309 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3310 gen_int_mode (cst, mode), NULL_RTX, 0);
3311 seq = get_insns ();
3312 end_sequence ();
3314 cost = seq_cost (seq);
3316 if (dump_file && (dump_flags & TDF_DETAILS))
3317 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3318 (int) cst, GET_MODE_NAME (mode), cost);
3320 (*cached)->cost = cost;
3322 return cost;
3325 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3326 validity for a memory reference accessing memory of mode MODE. */
3328 bool
3329 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3331 #define MAX_RATIO 128
3332 static sbitmap valid_mult[MAX_MACHINE_MODE];
3334 if (!valid_mult[mode])
3336 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3337 rtx addr;
3338 HOST_WIDE_INT i;
3340 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3341 sbitmap_zero (valid_mult[mode]);
3342 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3343 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3345 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3346 if (memory_address_p (mode, addr))
3347 SET_BIT (valid_mult[mode], i + MAX_RATIO);
3350 if (dump_file && (dump_flags & TDF_DETAILS))
3352 fprintf (dump_file, " allowed multipliers:");
3353 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3354 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3355 fprintf (dump_file, " %d", (int) i);
3356 fprintf (dump_file, "\n");
3357 fprintf (dump_file, "\n");
3361 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3362 return false;
3364 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3367 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3368 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3369 variable is omitted. Compute the cost for a memory reference that accesses
3370 a memory location of mode MEM_MODE.
3372 TODO -- there must be some better way. This all is quite crude. */
3374 static unsigned
3375 get_address_cost (bool symbol_present, bool var_present,
3376 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3377 enum machine_mode mem_mode)
3379 static bool initialized[MAX_MACHINE_MODE];
3380 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3381 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3382 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3383 unsigned cost, acost;
3384 bool offset_p, ratio_p;
3385 HOST_WIDE_INT s_offset;
3386 unsigned HOST_WIDE_INT mask;
3387 unsigned bits;
3389 if (!initialized[mem_mode])
3391 HOST_WIDE_INT i;
3392 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3393 int old_cse_not_expected;
3394 unsigned sym_p, var_p, off_p, rat_p, add_c;
3395 rtx seq, addr, base;
3396 rtx reg0, reg1;
3398 initialized[mem_mode] = true;
3400 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3402 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3403 for (i = start; i <= 1 << 20; i <<= 1)
3405 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3406 if (!memory_address_p (mem_mode, addr))
3407 break;
3409 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3410 off[mem_mode] = max_offset[mem_mode];
3412 for (i = start; i <= 1 << 20; i <<= 1)
3414 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3415 if (!memory_address_p (mem_mode, addr))
3416 break;
3418 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3420 if (dump_file && (dump_flags & TDF_DETAILS))
3422 fprintf (dump_file, "get_address_cost:\n");
3423 fprintf (dump_file, " min offset %s %d\n",
3424 GET_MODE_NAME (mem_mode),
3425 (int) min_offset[mem_mode]);
3426 fprintf (dump_file, " max offset %s %d\n",
3427 GET_MODE_NAME (mem_mode),
3428 (int) max_offset[mem_mode]);
3431 rat[mem_mode] = 1;
3432 for (i = 2; i <= MAX_RATIO; i++)
3433 if (multiplier_allowed_in_address_p (i, mem_mode))
3435 rat[mem_mode] = i;
3436 break;
3439 /* Compute the cost of various addressing modes. */
3440 acost = 0;
3441 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3442 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3444 for (i = 0; i < 16; i++)
3446 sym_p = i & 1;
3447 var_p = (i >> 1) & 1;
3448 off_p = (i >> 2) & 1;
3449 rat_p = (i >> 3) & 1;
3451 addr = reg0;
3452 if (rat_p)
3453 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3454 gen_int_mode (rat[mem_mode], Pmode));
3456 if (var_p)
3457 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3459 if (sym_p)
3461 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3462 if (off_p)
3463 base = gen_rtx_fmt_e (CONST, Pmode,
3464 gen_rtx_fmt_ee (PLUS, Pmode,
3465 base,
3466 gen_int_mode (off[mem_mode],
3467 Pmode)));
3469 else if (off_p)
3470 base = gen_int_mode (off[mem_mode], Pmode);
3471 else
3472 base = NULL_RTX;
3474 if (base)
3475 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3477 start_sequence ();
3478 /* To avoid splitting addressing modes, pretend that no cse will
3479 follow. */
3480 old_cse_not_expected = cse_not_expected;
3481 cse_not_expected = true;
3482 addr = memory_address (mem_mode, addr);
3483 cse_not_expected = old_cse_not_expected;
3484 seq = get_insns ();
3485 end_sequence ();
3487 acost = seq_cost (seq);
3488 acost += address_cost (addr, mem_mode);
3490 if (!acost)
3491 acost = 1;
3492 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3495 /* On some targets, it is quite expensive to load symbol to a register,
3496 which makes addresses that contain symbols look much more expensive.
3497 However, the symbol will have to be loaded in any case before the
3498 loop (and quite likely we have it in register already), so it does not
3499 make much sense to penalize them too heavily. So make some final
3500 tweaks for the SYMBOL_PRESENT modes:
3502 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3503 var is cheaper, use this mode with small penalty.
3504 If VAR_PRESENT is true, try whether the mode with
3505 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3506 if this is the case, use it. */
3507 add_c = add_cost (Pmode);
3508 for (i = 0; i < 8; i++)
3510 var_p = i & 1;
3511 off_p = (i >> 1) & 1;
3512 rat_p = (i >> 2) & 1;
3514 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3515 if (var_p)
3516 acost += add_c;
3518 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3519 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3522 if (dump_file && (dump_flags & TDF_DETAILS))
3524 fprintf (dump_file, "Address costs:\n");
3526 for (i = 0; i < 16; i++)
3528 sym_p = i & 1;
3529 var_p = (i >> 1) & 1;
3530 off_p = (i >> 2) & 1;
3531 rat_p = (i >> 3) & 1;
3533 fprintf (dump_file, " ");
3534 if (sym_p)
3535 fprintf (dump_file, "sym + ");
3536 if (var_p)
3537 fprintf (dump_file, "var + ");
3538 if (off_p)
3539 fprintf (dump_file, "cst + ");
3540 if (rat_p)
3541 fprintf (dump_file, "rat * ");
3543 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3544 fprintf (dump_file, "index costs %d\n", acost);
3546 fprintf (dump_file, "\n");
3550 bits = GET_MODE_BITSIZE (Pmode);
3551 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3552 offset &= mask;
3553 if ((offset >> (bits - 1) & 1))
3554 offset |= ~mask;
3555 s_offset = offset;
3557 cost = 0;
3558 offset_p = (s_offset != 0
3559 && min_offset[mem_mode] <= s_offset
3560 && s_offset <= max_offset[mem_mode]);
3561 ratio_p = (ratio != 1
3562 && multiplier_allowed_in_address_p (ratio, mem_mode));
3564 if (ratio != 1 && !ratio_p)
3565 cost += multiply_by_cost (ratio, Pmode);
3567 if (s_offset && !offset_p && !symbol_present)
3568 cost += add_cost (Pmode);
3570 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3571 return cost + acost;
3574 /* Estimates cost of forcing expression EXPR into a variable. */
3576 unsigned
3577 force_expr_to_var_cost (tree expr)
3579 static bool costs_initialized = false;
3580 static unsigned integer_cost;
3581 static unsigned symbol_cost;
3582 static unsigned address_cost;
3583 tree op0, op1;
3584 unsigned cost0, cost1, cost;
3585 enum machine_mode mode;
3587 if (!costs_initialized)
3589 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3590 rtx x = gen_rtx_MEM (DECL_MODE (var),
3591 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3592 tree addr;
3593 tree type = build_pointer_type (integer_type_node);
3595 integer_cost = computation_cost (build_int_cst (integer_type_node,
3596 2000));
3598 SET_DECL_RTL (var, x);
3599 TREE_STATIC (var) = 1;
3600 addr = build1 (ADDR_EXPR, type, var);
3601 symbol_cost = computation_cost (addr) + 1;
3603 address_cost
3604 = computation_cost (build2 (PLUS_EXPR, type,
3605 addr,
3606 build_int_cst (type, 2000))) + 1;
3607 if (dump_file && (dump_flags & TDF_DETAILS))
3609 fprintf (dump_file, "force_expr_to_var_cost:\n");
3610 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3611 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3612 fprintf (dump_file, " address %d\n", (int) address_cost);
3613 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3614 fprintf (dump_file, "\n");
3617 costs_initialized = true;
3620 STRIP_NOPS (expr);
3622 if (SSA_VAR_P (expr))
3623 return 0;
3625 if (TREE_INVARIANT (expr))
3627 if (TREE_CODE (expr) == INTEGER_CST)
3628 return integer_cost;
3630 if (TREE_CODE (expr) == ADDR_EXPR)
3632 tree obj = TREE_OPERAND (expr, 0);
3634 if (TREE_CODE (obj) == VAR_DECL
3635 || TREE_CODE (obj) == PARM_DECL
3636 || TREE_CODE (obj) == RESULT_DECL)
3637 return symbol_cost;
3640 return address_cost;
3643 switch (TREE_CODE (expr))
3645 case PLUS_EXPR:
3646 case MINUS_EXPR:
3647 case MULT_EXPR:
3648 op0 = TREE_OPERAND (expr, 0);
3649 op1 = TREE_OPERAND (expr, 1);
3650 STRIP_NOPS (op0);
3651 STRIP_NOPS (op1);
3653 if (is_gimple_val (op0))
3654 cost0 = 0;
3655 else
3656 cost0 = force_expr_to_var_cost (op0);
3658 if (is_gimple_val (op1))
3659 cost1 = 0;
3660 else
3661 cost1 = force_expr_to_var_cost (op1);
3663 break;
3665 default:
3666 /* Just an arbitrary value, FIXME. */
3667 return target_spill_cost;
3670 mode = TYPE_MODE (TREE_TYPE (expr));
3671 switch (TREE_CODE (expr))
3673 case PLUS_EXPR:
3674 case MINUS_EXPR:
3675 cost = add_cost (mode);
3676 break;
3678 case MULT_EXPR:
3679 if (cst_and_fits_in_hwi (op0))
3680 cost = multiply_by_cost (int_cst_value (op0), mode);
3681 else if (cst_and_fits_in_hwi (op1))
3682 cost = multiply_by_cost (int_cst_value (op1), mode);
3683 else
3684 return target_spill_cost;
3685 break;
3687 default:
3688 gcc_unreachable ();
3691 cost += cost0;
3692 cost += cost1;
3694 /* Bound the cost by target_spill_cost. The parts of complicated
3695 computations often are either loop invariant or at least can
3696 be shared between several iv uses, so letting this grow without
3697 limits would not give reasonable results. */
3698 return cost < target_spill_cost ? cost : target_spill_cost;
3701 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3702 invariants the computation depends on. */
3704 static unsigned
3705 force_var_cost (struct ivopts_data *data,
3706 tree expr, bitmap *depends_on)
3708 if (depends_on)
3710 fd_ivopts_data = data;
3711 walk_tree (&expr, find_depends, depends_on, NULL);
3714 return force_expr_to_var_cost (expr);
3717 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3718 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3719 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3720 invariants the computation depends on. */
3722 static unsigned
3723 split_address_cost (struct ivopts_data *data,
3724 tree addr, bool *symbol_present, bool *var_present,
3725 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3727 tree core;
3728 HOST_WIDE_INT bitsize;
3729 HOST_WIDE_INT bitpos;
3730 tree toffset;
3731 enum machine_mode mode;
3732 int unsignedp, volatilep;
3734 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3735 &unsignedp, &volatilep, false);
3737 if (toffset != 0
3738 || bitpos % BITS_PER_UNIT != 0
3739 || TREE_CODE (core) != VAR_DECL)
3741 *symbol_present = false;
3742 *var_present = true;
3743 fd_ivopts_data = data;
3744 walk_tree (&addr, find_depends, depends_on, NULL);
3745 return target_spill_cost;
3748 *offset += bitpos / BITS_PER_UNIT;
3749 if (TREE_STATIC (core)
3750 || DECL_EXTERNAL (core))
3752 *symbol_present = true;
3753 *var_present = false;
3754 return 0;
3757 *symbol_present = false;
3758 *var_present = true;
3759 return 0;
3762 /* Estimates cost of expressing difference of addresses E1 - E2 as
3763 var + symbol + offset. The value of offset is added to OFFSET,
3764 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3765 part is missing. DEPENDS_ON is a set of the invariants the computation
3766 depends on. */
3768 static unsigned
3769 ptr_difference_cost (struct ivopts_data *data,
3770 tree e1, tree e2, bool *symbol_present, bool *var_present,
3771 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3773 HOST_WIDE_INT diff = 0;
3774 unsigned cost;
3776 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3778 if (ptr_difference_const (e1, e2, &diff))
3780 *offset += diff;
3781 *symbol_present = false;
3782 *var_present = false;
3783 return 0;
3786 if (e2 == integer_zero_node)
3787 return split_address_cost (data, TREE_OPERAND (e1, 0),
3788 symbol_present, var_present, offset, depends_on);
3790 *symbol_present = false;
3791 *var_present = true;
3793 cost = force_var_cost (data, e1, depends_on);
3794 cost += force_var_cost (data, e2, depends_on);
3795 cost += add_cost (Pmode);
3797 return cost;
3800 /* Estimates cost of expressing difference E1 - E2 as
3801 var + symbol + offset. The value of offset is added to OFFSET,
3802 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3803 part is missing. DEPENDS_ON is a set of the invariants the computation
3804 depends on. */
3806 static unsigned
3807 difference_cost (struct ivopts_data *data,
3808 tree e1, tree e2, bool *symbol_present, bool *var_present,
3809 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3811 unsigned cost;
3812 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3813 unsigned HOST_WIDE_INT off1, off2;
3815 e1 = strip_offset (e1, &off1);
3816 e2 = strip_offset (e2, &off2);
3817 *offset += off1 - off2;
3819 STRIP_NOPS (e1);
3820 STRIP_NOPS (e2);
3822 if (TREE_CODE (e1) == ADDR_EXPR)
3823 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3824 depends_on);
3825 *symbol_present = false;
3827 if (operand_equal_p (e1, e2, 0))
3829 *var_present = false;
3830 return 0;
3832 *var_present = true;
3833 if (zero_p (e2))
3834 return force_var_cost (data, e1, depends_on);
3836 if (zero_p (e1))
3838 cost = force_var_cost (data, e2, depends_on);
3839 cost += multiply_by_cost (-1, mode);
3841 return cost;
3844 cost = force_var_cost (data, e1, depends_on);
3845 cost += force_var_cost (data, e2, depends_on);
3846 cost += add_cost (mode);
3848 return cost;
3851 /* Determines the cost of the computation by that USE is expressed
3852 from induction variable CAND. If ADDRESS_P is true, we just need
3853 to create an address from it, otherwise we want to get it into
3854 register. A set of invariants we depend on is stored in
3855 DEPENDS_ON. AT is the statement at that the value is computed. */
3857 static unsigned
3858 get_computation_cost_at (struct ivopts_data *data,
3859 struct iv_use *use, struct iv_cand *cand,
3860 bool address_p, bitmap *depends_on, tree at)
3862 tree ubase = use->iv->base, ustep = use->iv->step;
3863 tree cbase, cstep;
3864 tree utype = TREE_TYPE (ubase), ctype;
3865 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3866 HOST_WIDE_INT ratio, aratio;
3867 bool var_present, symbol_present;
3868 unsigned cost = 0, n_sums;
3870 *depends_on = NULL;
3872 /* Only consider real candidates. */
3873 if (!cand->iv)
3874 return INFTY;
3876 cbase = cand->iv->base;
3877 cstep = cand->iv->step;
3878 ctype = TREE_TYPE (cbase);
3880 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3882 /* We do not have a precision to express the values of use. */
3883 return INFTY;
3886 if (address_p)
3888 /* Do not try to express address of an object with computation based
3889 on address of a different object. This may cause problems in rtl
3890 level alias analysis (that does not expect this to be happening,
3891 as this is illegal in C), and would be unlikely to be useful
3892 anyway. */
3893 if (use->iv->base_object
3894 && cand->iv->base_object
3895 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3896 return INFTY;
3899 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3901 /* TODO -- add direct handling of this case. */
3902 goto fallback;
3905 /* CSTEPI is removed from the offset in case statement is after the
3906 increment. If the step is not constant, we use zero instead.
3907 This is a bit imprecise (there is the extra addition), but
3908 redundancy elimination is likely to transform the code so that
3909 it uses value of the variable before increment anyway,
3910 so it is not that much unrealistic. */
3911 if (cst_and_fits_in_hwi (cstep))
3912 cstepi = int_cst_value (cstep);
3913 else
3914 cstepi = 0;
3916 if (cst_and_fits_in_hwi (ustep)
3917 && cst_and_fits_in_hwi (cstep))
3919 ustepi = int_cst_value (ustep);
3921 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3922 return INFTY;
3924 else
3926 double_int rat;
3928 if (!constant_multiple_of (ustep, cstep, &rat))
3929 return INFTY;
3931 if (double_int_fits_in_shwi_p (rat))
3932 ratio = double_int_to_shwi (rat);
3933 else
3934 return INFTY;
3937 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3938 or ratio == 1, it is better to handle this like
3940 ubase - ratio * cbase + ratio * var
3942 (also holds in the case ratio == -1, TODO. */
3944 if (cst_and_fits_in_hwi (cbase))
3946 offset = - ratio * int_cst_value (cbase);
3947 cost += difference_cost (data,
3948 ubase, integer_zero_node,
3949 &symbol_present, &var_present, &offset,
3950 depends_on);
3952 else if (ratio == 1)
3954 cost += difference_cost (data,
3955 ubase, cbase,
3956 &symbol_present, &var_present, &offset,
3957 depends_on);
3959 else
3961 cost += force_var_cost (data, cbase, depends_on);
3962 cost += add_cost (TYPE_MODE (ctype));
3963 cost += difference_cost (data,
3964 ubase, integer_zero_node,
3965 &symbol_present, &var_present, &offset,
3966 depends_on);
3969 /* If we are after the increment, the value of the candidate is higher by
3970 one iteration. */
3971 if (stmt_after_increment (data->current_loop, cand, at))
3972 offset -= ratio * cstepi;
3974 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3975 (symbol/var/const parts may be omitted). If we are looking for an address,
3976 find the cost of addressing this. */
3977 if (address_p)
3978 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3979 TYPE_MODE (TREE_TYPE (*use->op_p)));
3981 /* Otherwise estimate the costs for computing the expression. */
3982 aratio = ratio > 0 ? ratio : -ratio;
3983 if (!symbol_present && !var_present && !offset)
3985 if (ratio != 1)
3986 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3988 return cost;
3991 if (aratio != 1)
3992 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3994 n_sums = 1;
3995 if (var_present
3996 /* Symbol + offset should be compile-time computable. */
3997 && (symbol_present || offset))
3998 n_sums++;
4000 return cost + n_sums * add_cost (TYPE_MODE (ctype));
4002 fallback:
4004 /* Just get the expression, expand it and measure the cost. */
4005 tree comp = get_computation_at (data->current_loop, use, cand, at);
4007 if (!comp)
4008 return INFTY;
4010 if (address_p)
4011 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
4013 return computation_cost (comp);
4017 /* Determines the cost of the computation by that USE is expressed
4018 from induction variable CAND. If ADDRESS_P is true, we just need
4019 to create an address from it, otherwise we want to get it into
4020 register. A set of invariants we depend on is stored in
4021 DEPENDS_ON. */
4023 static unsigned
4024 get_computation_cost (struct ivopts_data *data,
4025 struct iv_use *use, struct iv_cand *cand,
4026 bool address_p, bitmap *depends_on)
4028 return get_computation_cost_at (data,
4029 use, cand, address_p, depends_on, use->stmt);
4032 /* Determines cost of basing replacement of USE on CAND in a generic
4033 expression. */
4035 static bool
4036 determine_use_iv_cost_generic (struct ivopts_data *data,
4037 struct iv_use *use, struct iv_cand *cand)
4039 bitmap depends_on;
4040 unsigned cost;
4042 /* The simple case first -- if we need to express value of the preserved
4043 original biv, the cost is 0. This also prevents us from counting the
4044 cost of increment twice -- once at this use and once in the cost of
4045 the candidate. */
4046 if (cand->pos == IP_ORIGINAL
4047 && cand->incremented_at == use->stmt)
4049 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4050 return true;
4053 cost = get_computation_cost (data, use, cand, false, &depends_on);
4054 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056 return cost != INFTY;
4059 /* Determines cost of basing replacement of USE on CAND in an address. */
4061 static bool
4062 determine_use_iv_cost_address (struct ivopts_data *data,
4063 struct iv_use *use, struct iv_cand *cand)
4065 bitmap depends_on;
4066 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4068 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4070 return cost != INFTY;
4073 /* Computes value of induction variable IV in iteration NITER. */
4075 static tree
4076 iv_value (struct iv *iv, tree niter)
4078 tree val;
4079 tree type = TREE_TYPE (iv->base);
4081 niter = fold_convert (type, niter);
4082 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4084 return fold_build2 (PLUS_EXPR, type, iv->base, val);
4087 /* Computes value of candidate CAND at position AT in iteration NITER. */
4089 static tree
4090 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4092 tree val = iv_value (cand->iv, niter);
4093 tree type = TREE_TYPE (cand->iv->base);
4095 if (stmt_after_increment (loop, cand, at))
4096 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4098 return val;
4101 /* Returns period of induction variable iv. */
4103 static tree
4104 iv_period (struct iv *iv)
4106 tree step = iv->step, period, type;
4107 tree pow2div;
4109 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4111 /* Period of the iv is gcd (step, type range). Since type range is power
4112 of two, it suffices to determine the maximum power of two that divides
4113 step. */
4114 pow2div = num_ending_zeros (step);
4115 type = unsigned_type_for (TREE_TYPE (step));
4117 period = build_low_bits_mask (type,
4118 (TYPE_PRECISION (type)
4119 - tree_low_cst (pow2div, 1)));
4121 return period;
4124 /* Returns the comparison operator used when eliminating the iv USE. */
4126 static enum tree_code
4127 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4129 struct loop *loop = data->current_loop;
4130 basic_block ex_bb;
4131 edge exit;
4133 ex_bb = bb_for_stmt (use->stmt);
4134 exit = EDGE_SUCC (ex_bb, 0);
4135 if (flow_bb_inside_loop_p (loop, exit->dest))
4136 exit = EDGE_SUCC (ex_bb, 1);
4138 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4141 /* Check whether it is possible to express the condition in USE by comparison
4142 of candidate CAND. If so, store the value compared with to BOUND. */
4144 static bool
4145 may_eliminate_iv (struct ivopts_data *data,
4146 struct iv_use *use, struct iv_cand *cand, tree *bound)
4148 basic_block ex_bb;
4149 edge exit;
4150 tree nit, nit_type;
4151 tree wider_type, period, per_type;
4152 struct loop *loop = data->current_loop;
4154 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4155 return false;
4157 /* For now works only for exits that dominate the loop latch. TODO -- extend
4158 for other conditions inside loop body. */
4159 ex_bb = bb_for_stmt (use->stmt);
4160 if (use->stmt != last_stmt (ex_bb)
4161 || TREE_CODE (use->stmt) != COND_EXPR)
4162 return false;
4163 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4164 return false;
4166 exit = EDGE_SUCC (ex_bb, 0);
4167 if (flow_bb_inside_loop_p (loop, exit->dest))
4168 exit = EDGE_SUCC (ex_bb, 1);
4169 if (flow_bb_inside_loop_p (loop, exit->dest))
4170 return false;
4172 nit = niter_for_exit (data, exit);
4173 if (!nit)
4174 return false;
4176 nit_type = TREE_TYPE (nit);
4178 /* Determine whether we may use the variable to test whether niter iterations
4179 elapsed. This is the case iff the period of the induction variable is
4180 greater than the number of iterations. */
4181 period = iv_period (cand->iv);
4182 if (!period)
4183 return false;
4184 per_type = TREE_TYPE (period);
4186 wider_type = TREE_TYPE (period);
4187 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4188 wider_type = per_type;
4189 else
4190 wider_type = nit_type;
4192 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4193 fold_convert (wider_type, period),
4194 fold_convert (wider_type, nit))))
4195 return false;
4197 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4198 return true;
4201 /* Determines cost of basing replacement of USE on CAND in a condition. */
4203 static bool
4204 determine_use_iv_cost_condition (struct ivopts_data *data,
4205 struct iv_use *use, struct iv_cand *cand)
4207 tree bound = NULL_TREE, op, cond;
4208 bitmap depends_on = NULL;
4209 unsigned cost;
4211 /* Only consider real candidates. */
4212 if (!cand->iv)
4214 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4215 return false;
4218 if (may_eliminate_iv (data, use, cand, &bound))
4220 cost = force_var_cost (data, bound, &depends_on);
4222 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4223 return cost != INFTY;
4226 /* The induction variable elimination failed; just express the original
4227 giv. If it is compared with an invariant, note that we cannot get
4228 rid of it. */
4229 cost = get_computation_cost (data, use, cand, false, &depends_on);
4231 cond = *use->op_p;
4232 if (TREE_CODE (cond) != SSA_NAME)
4234 op = TREE_OPERAND (cond, 0);
4235 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4236 op = TREE_OPERAND (cond, 1);
4237 if (TREE_CODE (op) == SSA_NAME)
4239 op = get_iv (data, op)->base;
4240 fd_ivopts_data = data;
4241 walk_tree (&op, find_depends, &depends_on, NULL);
4245 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4246 return cost != INFTY;
4249 /* Determines cost of basing replacement of USE on CAND. Returns false
4250 if USE cannot be based on CAND. */
4252 static bool
4253 determine_use_iv_cost (struct ivopts_data *data,
4254 struct iv_use *use, struct iv_cand *cand)
4256 switch (use->type)
4258 case USE_NONLINEAR_EXPR:
4259 return determine_use_iv_cost_generic (data, use, cand);
4261 case USE_ADDRESS:
4262 return determine_use_iv_cost_address (data, use, cand);
4264 case USE_COMPARE:
4265 return determine_use_iv_cost_condition (data, use, cand);
4267 default:
4268 gcc_unreachable ();
4272 /* Determines costs of basing the use of the iv on an iv candidate. */
4274 static void
4275 determine_use_iv_costs (struct ivopts_data *data)
4277 unsigned i, j;
4278 struct iv_use *use;
4279 struct iv_cand *cand;
4280 bitmap to_clear = BITMAP_ALLOC (NULL);
4282 alloc_use_cost_map (data);
4284 for (i = 0; i < n_iv_uses (data); i++)
4286 use = iv_use (data, i);
4288 if (data->consider_all_candidates)
4290 for (j = 0; j < n_iv_cands (data); j++)
4292 cand = iv_cand (data, j);
4293 determine_use_iv_cost (data, use, cand);
4296 else
4298 bitmap_iterator bi;
4300 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4302 cand = iv_cand (data, j);
4303 if (!determine_use_iv_cost (data, use, cand))
4304 bitmap_set_bit (to_clear, j);
4307 /* Remove the candidates for that the cost is infinite from
4308 the list of related candidates. */
4309 bitmap_and_compl_into (use->related_cands, to_clear);
4310 bitmap_clear (to_clear);
4314 BITMAP_FREE (to_clear);
4316 if (dump_file && (dump_flags & TDF_DETAILS))
4318 fprintf (dump_file, "Use-candidate costs:\n");
4320 for (i = 0; i < n_iv_uses (data); i++)
4322 use = iv_use (data, i);
4324 fprintf (dump_file, "Use %d:\n", i);
4325 fprintf (dump_file, " cand\tcost\tdepends on\n");
4326 for (j = 0; j < use->n_map_members; j++)
4328 if (!use->cost_map[j].cand
4329 || use->cost_map[j].cost == INFTY)
4330 continue;
4332 fprintf (dump_file, " %d\t%d\t",
4333 use->cost_map[j].cand->id,
4334 use->cost_map[j].cost);
4335 if (use->cost_map[j].depends_on)
4336 bitmap_print (dump_file,
4337 use->cost_map[j].depends_on, "","");
4338 fprintf (dump_file, "\n");
4341 fprintf (dump_file, "\n");
4343 fprintf (dump_file, "\n");
4347 /* Determines cost of the candidate CAND. */
4349 static void
4350 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4352 unsigned cost_base, cost_step;
4353 tree base;
4355 if (!cand->iv)
4357 cand->cost = 0;
4358 return;
4361 /* There are two costs associated with the candidate -- its increment
4362 and its initialization. The second is almost negligible for any loop
4363 that rolls enough, so we take it just very little into account. */
4365 base = cand->iv->base;
4366 cost_base = force_var_cost (data, base, NULL);
4367 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4369 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4371 /* Prefer the original iv unless we may gain something by replacing it;
4372 this is not really relevant for artificial ivs created by other
4373 passes. */
4374 if (cand->pos == IP_ORIGINAL
4375 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4376 cand->cost--;
4378 /* Prefer not to insert statements into latch unless there are some
4379 already (so that we do not create unnecessary jumps). */
4380 if (cand->pos == IP_END
4381 && empty_block_p (ip_end_pos (data->current_loop)))
4382 cand->cost++;
4385 /* Determines costs of computation of the candidates. */
4387 static void
4388 determine_iv_costs (struct ivopts_data *data)
4390 unsigned i;
4392 if (dump_file && (dump_flags & TDF_DETAILS))
4394 fprintf (dump_file, "Candidate costs:\n");
4395 fprintf (dump_file, " cand\tcost\n");
4398 for (i = 0; i < n_iv_cands (data); i++)
4400 struct iv_cand *cand = iv_cand (data, i);
4402 determine_iv_cost (data, cand);
4404 if (dump_file && (dump_flags & TDF_DETAILS))
4405 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4408 if (dump_file && (dump_flags & TDF_DETAILS))
4409 fprintf (dump_file, "\n");
4412 /* Calculates cost for having SIZE induction variables. */
4414 static unsigned
4415 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4417 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4420 /* For each size of the induction variable set determine the penalty. */
4422 static void
4423 determine_set_costs (struct ivopts_data *data)
4425 unsigned j, n;
4426 tree phi, op;
4427 struct loop *loop = data->current_loop;
4428 bitmap_iterator bi;
4430 /* We use the following model (definitely improvable, especially the
4431 cost function -- TODO):
4433 We estimate the number of registers available (using MD data), name it A.
4435 We estimate the number of registers used by the loop, name it U. This
4436 number is obtained as the number of loop phi nodes (not counting virtual
4437 registers and bivs) + the number of variables from outside of the loop.
4439 We set a reserve R (free regs that are used for temporary computations,
4440 etc.). For now the reserve is a constant 3.
4442 Let I be the number of induction variables.
4444 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4445 make a lot of ivs without a reason).
4446 -- if A - R < U + I <= A, the cost is I * PRES_COST
4447 -- if U + I > A, the cost is I * PRES_COST and
4448 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4450 if (dump_file && (dump_flags & TDF_DETAILS))
4452 fprintf (dump_file, "Global costs:\n");
4453 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4454 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4455 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4456 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4459 n = 0;
4460 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4462 op = PHI_RESULT (phi);
4464 if (!is_gimple_reg (op))
4465 continue;
4467 if (get_iv (data, op))
4468 continue;
4470 n++;
4473 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4475 struct version_info *info = ver_info (data, j);
4477 if (info->inv_id && info->has_nonlin_use)
4478 n++;
4481 data->regs_used = n;
4482 if (dump_file && (dump_flags & TDF_DETAILS))
4483 fprintf (dump_file, " regs_used %d\n", n);
4485 if (dump_file && (dump_flags & TDF_DETAILS))
4487 fprintf (dump_file, " cost for size:\n");
4488 fprintf (dump_file, " ivs\tcost\n");
4489 for (j = 0; j <= 2 * target_avail_regs; j++)
4490 fprintf (dump_file, " %d\t%d\n", j,
4491 ivopts_global_cost_for_size (data, j));
4492 fprintf (dump_file, "\n");
4496 /* Returns true if A is a cheaper cost pair than B. */
4498 static bool
4499 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4501 if (!a)
4502 return false;
4504 if (!b)
4505 return true;
4507 if (a->cost < b->cost)
4508 return true;
4510 if (a->cost > b->cost)
4511 return false;
4513 /* In case the costs are the same, prefer the cheaper candidate. */
4514 if (a->cand->cost < b->cand->cost)
4515 return true;
4517 return false;
4520 /* Computes the cost field of IVS structure. */
4522 static void
4523 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4525 unsigned cost = 0;
4527 cost += ivs->cand_use_cost;
4528 cost += ivs->cand_cost;
4529 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4531 ivs->cost = cost;
4534 /* Remove invariants in set INVS to set IVS. */
4536 static void
4537 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4539 bitmap_iterator bi;
4540 unsigned iid;
4542 if (!invs)
4543 return;
4545 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4547 ivs->n_invariant_uses[iid]--;
4548 if (ivs->n_invariant_uses[iid] == 0)
4549 ivs->n_regs--;
4553 /* Set USE not to be expressed by any candidate in IVS. */
4555 static void
4556 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4557 struct iv_use *use)
4559 unsigned uid = use->id, cid;
4560 struct cost_pair *cp;
4562 cp = ivs->cand_for_use[uid];
4563 if (!cp)
4564 return;
4565 cid = cp->cand->id;
4567 ivs->bad_uses++;
4568 ivs->cand_for_use[uid] = NULL;
4569 ivs->n_cand_uses[cid]--;
4571 if (ivs->n_cand_uses[cid] == 0)
4573 bitmap_clear_bit (ivs->cands, cid);
4574 /* Do not count the pseudocandidates. */
4575 if (cp->cand->iv)
4576 ivs->n_regs--;
4577 ivs->n_cands--;
4578 ivs->cand_cost -= cp->cand->cost;
4580 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4583 ivs->cand_use_cost -= cp->cost;
4585 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4586 iv_ca_recount_cost (data, ivs);
4589 /* Add invariants in set INVS to set IVS. */
4591 static void
4592 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4594 bitmap_iterator bi;
4595 unsigned iid;
4597 if (!invs)
4598 return;
4600 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4602 ivs->n_invariant_uses[iid]++;
4603 if (ivs->n_invariant_uses[iid] == 1)
4604 ivs->n_regs++;
4608 /* Set cost pair for USE in set IVS to CP. */
4610 static void
4611 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4612 struct iv_use *use, struct cost_pair *cp)
4614 unsigned uid = use->id, cid;
4616 if (ivs->cand_for_use[uid] == cp)
4617 return;
4619 if (ivs->cand_for_use[uid])
4620 iv_ca_set_no_cp (data, ivs, use);
4622 if (cp)
4624 cid = cp->cand->id;
4626 ivs->bad_uses--;
4627 ivs->cand_for_use[uid] = cp;
4628 ivs->n_cand_uses[cid]++;
4629 if (ivs->n_cand_uses[cid] == 1)
4631 bitmap_set_bit (ivs->cands, cid);
4632 /* Do not count the pseudocandidates. */
4633 if (cp->cand->iv)
4634 ivs->n_regs++;
4635 ivs->n_cands++;
4636 ivs->cand_cost += cp->cand->cost;
4638 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4641 ivs->cand_use_cost += cp->cost;
4642 iv_ca_set_add_invariants (ivs, cp->depends_on);
4643 iv_ca_recount_cost (data, ivs);
4647 /* Extend set IVS by expressing USE by some of the candidates in it
4648 if possible. */
4650 static void
4651 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4652 struct iv_use *use)
4654 struct cost_pair *best_cp = NULL, *cp;
4655 bitmap_iterator bi;
4656 unsigned i;
4658 gcc_assert (ivs->upto >= use->id);
4660 if (ivs->upto == use->id)
4662 ivs->upto++;
4663 ivs->bad_uses++;
4666 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4668 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4670 if (cheaper_cost_pair (cp, best_cp))
4671 best_cp = cp;
4674 iv_ca_set_cp (data, ivs, use, best_cp);
4677 /* Get cost for assignment IVS. */
4679 static unsigned
4680 iv_ca_cost (struct iv_ca *ivs)
4682 return (ivs->bad_uses ? INFTY : ivs->cost);
4685 /* Returns true if all dependences of CP are among invariants in IVS. */
4687 static bool
4688 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4690 unsigned i;
4691 bitmap_iterator bi;
4693 if (!cp->depends_on)
4694 return true;
4696 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4698 if (ivs->n_invariant_uses[i] == 0)
4699 return false;
4702 return true;
4705 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4706 it before NEXT_CHANGE. */
4708 static struct iv_ca_delta *
4709 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4710 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4712 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4714 change->use = use;
4715 change->old_cp = old_cp;
4716 change->new_cp = new_cp;
4717 change->next_change = next_change;
4719 return change;
4722 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4723 are rewritten. */
4725 static struct iv_ca_delta *
4726 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4728 struct iv_ca_delta *last;
4730 if (!l2)
4731 return l1;
4733 if (!l1)
4734 return l2;
4736 for (last = l1; last->next_change; last = last->next_change)
4737 continue;
4738 last->next_change = l2;
4740 return l1;
4743 /* Returns candidate by that USE is expressed in IVS. */
4745 static struct cost_pair *
4746 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4748 return ivs->cand_for_use[use->id];
4751 /* Reverse the list of changes DELTA, forming the inverse to it. */
4753 static struct iv_ca_delta *
4754 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4756 struct iv_ca_delta *act, *next, *prev = NULL;
4757 struct cost_pair *tmp;
4759 for (act = delta; act; act = next)
4761 next = act->next_change;
4762 act->next_change = prev;
4763 prev = act;
4765 tmp = act->old_cp;
4766 act->old_cp = act->new_cp;
4767 act->new_cp = tmp;
4770 return prev;
4773 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4774 reverted instead. */
4776 static void
4777 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4778 struct iv_ca_delta *delta, bool forward)
4780 struct cost_pair *from, *to;
4781 struct iv_ca_delta *act;
4783 if (!forward)
4784 delta = iv_ca_delta_reverse (delta);
4786 for (act = delta; act; act = act->next_change)
4788 from = act->old_cp;
4789 to = act->new_cp;
4790 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4791 iv_ca_set_cp (data, ivs, act->use, to);
4794 if (!forward)
4795 iv_ca_delta_reverse (delta);
4798 /* Returns true if CAND is used in IVS. */
4800 static bool
4801 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4803 return ivs->n_cand_uses[cand->id] > 0;
4806 /* Returns number of induction variable candidates in the set IVS. */
4808 static unsigned
4809 iv_ca_n_cands (struct iv_ca *ivs)
4811 return ivs->n_cands;
4814 /* Free the list of changes DELTA. */
4816 static void
4817 iv_ca_delta_free (struct iv_ca_delta **delta)
4819 struct iv_ca_delta *act, *next;
4821 for (act = *delta; act; act = next)
4823 next = act->next_change;
4824 free (act);
4827 *delta = NULL;
4830 /* Allocates new iv candidates assignment. */
4832 static struct iv_ca *
4833 iv_ca_new (struct ivopts_data *data)
4835 struct iv_ca *nw = XNEW (struct iv_ca);
4837 nw->upto = 0;
4838 nw->bad_uses = 0;
4839 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4840 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4841 nw->cands = BITMAP_ALLOC (NULL);
4842 nw->n_cands = 0;
4843 nw->n_regs = 0;
4844 nw->cand_use_cost = 0;
4845 nw->cand_cost = 0;
4846 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4847 nw->cost = 0;
4849 return nw;
4852 /* Free memory occupied by the set IVS. */
4854 static void
4855 iv_ca_free (struct iv_ca **ivs)
4857 free ((*ivs)->cand_for_use);
4858 free ((*ivs)->n_cand_uses);
4859 BITMAP_FREE ((*ivs)->cands);
4860 free ((*ivs)->n_invariant_uses);
4861 free (*ivs);
4862 *ivs = NULL;
4865 /* Dumps IVS to FILE. */
4867 static void
4868 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4870 const char *pref = " invariants ";
4871 unsigned i;
4873 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4874 bitmap_print (file, ivs->cands, " candidates ","\n");
4876 for (i = 1; i <= data->max_inv_id; i++)
4877 if (ivs->n_invariant_uses[i])
4879 fprintf (file, "%s%d", pref, i);
4880 pref = ", ";
4882 fprintf (file, "\n");
4885 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4886 new set, and store differences in DELTA. Number of induction variables
4887 in the new set is stored to N_IVS. */
4889 static unsigned
4890 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4891 struct iv_cand *cand, struct iv_ca_delta **delta,
4892 unsigned *n_ivs)
4894 unsigned i, cost;
4895 struct iv_use *use;
4896 struct cost_pair *old_cp, *new_cp;
4898 *delta = NULL;
4899 for (i = 0; i < ivs->upto; i++)
4901 use = iv_use (data, i);
4902 old_cp = iv_ca_cand_for_use (ivs, use);
4904 if (old_cp
4905 && old_cp->cand == cand)
4906 continue;
4908 new_cp = get_use_iv_cost (data, use, cand);
4909 if (!new_cp)
4910 continue;
4912 if (!iv_ca_has_deps (ivs, new_cp))
4913 continue;
4915 if (!cheaper_cost_pair (new_cp, old_cp))
4916 continue;
4918 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4921 iv_ca_delta_commit (data, ivs, *delta, true);
4922 cost = iv_ca_cost (ivs);
4923 if (n_ivs)
4924 *n_ivs = iv_ca_n_cands (ivs);
4925 iv_ca_delta_commit (data, ivs, *delta, false);
4927 return cost;
4930 /* Try narrowing set IVS by removing CAND. Return the cost of
4931 the new set and store the differences in DELTA. */
4933 static unsigned
4934 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4935 struct iv_cand *cand, struct iv_ca_delta **delta)
4937 unsigned i, ci;
4938 struct iv_use *use;
4939 struct cost_pair *old_cp, *new_cp, *cp;
4940 bitmap_iterator bi;
4941 struct iv_cand *cnd;
4942 unsigned cost;
4944 *delta = NULL;
4945 for (i = 0; i < n_iv_uses (data); i++)
4947 use = iv_use (data, i);
4949 old_cp = iv_ca_cand_for_use (ivs, use);
4950 if (old_cp->cand != cand)
4951 continue;
4953 new_cp = NULL;
4955 if (data->consider_all_candidates)
4957 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4959 if (ci == cand->id)
4960 continue;
4962 cnd = iv_cand (data, ci);
4964 cp = get_use_iv_cost (data, use, cnd);
4965 if (!cp)
4966 continue;
4967 if (!iv_ca_has_deps (ivs, cp))
4968 continue;
4970 if (!cheaper_cost_pair (cp, new_cp))
4971 continue;
4973 new_cp = cp;
4976 else
4978 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4980 if (ci == cand->id)
4981 continue;
4983 cnd = iv_cand (data, ci);
4985 cp = get_use_iv_cost (data, use, cnd);
4986 if (!cp)
4987 continue;
4988 if (!iv_ca_has_deps (ivs, cp))
4989 continue;
4991 if (!cheaper_cost_pair (cp, new_cp))
4992 continue;
4994 new_cp = cp;
4998 if (!new_cp)
5000 iv_ca_delta_free (delta);
5001 return INFTY;
5004 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5007 iv_ca_delta_commit (data, ivs, *delta, true);
5008 cost = iv_ca_cost (ivs);
5009 iv_ca_delta_commit (data, ivs, *delta, false);
5011 return cost;
5014 /* Try optimizing the set of candidates IVS by removing candidates different
5015 from to EXCEPT_CAND from it. Return cost of the new set, and store
5016 differences in DELTA. */
5018 static unsigned
5019 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5020 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5022 bitmap_iterator bi;
5023 struct iv_ca_delta *act_delta, *best_delta;
5024 unsigned i, best_cost, acost;
5025 struct iv_cand *cand;
5027 best_delta = NULL;
5028 best_cost = iv_ca_cost (ivs);
5030 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5032 cand = iv_cand (data, i);
5034 if (cand == except_cand)
5035 continue;
5037 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5039 if (acost < best_cost)
5041 best_cost = acost;
5042 iv_ca_delta_free (&best_delta);
5043 best_delta = act_delta;
5045 else
5046 iv_ca_delta_free (&act_delta);
5049 if (!best_delta)
5051 *delta = NULL;
5052 return best_cost;
5055 /* Recurse to possibly remove other unnecessary ivs. */
5056 iv_ca_delta_commit (data, ivs, best_delta, true);
5057 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5058 iv_ca_delta_commit (data, ivs, best_delta, false);
5059 *delta = iv_ca_delta_join (best_delta, *delta);
5060 return best_cost;
5063 /* Tries to extend the sets IVS in the best possible way in order
5064 to express the USE. */
5066 static bool
5067 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5068 struct iv_use *use)
5070 unsigned best_cost, act_cost;
5071 unsigned i;
5072 bitmap_iterator bi;
5073 struct iv_cand *cand;
5074 struct iv_ca_delta *best_delta = NULL, *act_delta;
5075 struct cost_pair *cp;
5077 iv_ca_add_use (data, ivs, use);
5078 best_cost = iv_ca_cost (ivs);
5080 cp = iv_ca_cand_for_use (ivs, use);
5081 if (cp)
5083 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5084 iv_ca_set_no_cp (data, ivs, use);
5087 /* First try important candidates. Only if it fails, try the specific ones.
5088 Rationale -- in loops with many variables the best choice often is to use
5089 just one generic biv. If we added here many ivs specific to the uses,
5090 the optimization algorithm later would be likely to get stuck in a local
5091 minimum, thus causing us to create too many ivs. The approach from
5092 few ivs to more seems more likely to be successful -- starting from few
5093 ivs, replacing an expensive use by a specific iv should always be a
5094 win. */
5095 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5097 cand = iv_cand (data, i);
5099 if (iv_ca_cand_used_p (ivs, cand))
5100 continue;
5102 cp = get_use_iv_cost (data, use, cand);
5103 if (!cp)
5104 continue;
5106 iv_ca_set_cp (data, ivs, use, cp);
5107 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5108 iv_ca_set_no_cp (data, ivs, use);
5109 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5111 if (act_cost < best_cost)
5113 best_cost = act_cost;
5115 iv_ca_delta_free (&best_delta);
5116 best_delta = act_delta;
5118 else
5119 iv_ca_delta_free (&act_delta);
5122 if (best_cost == INFTY)
5124 for (i = 0; i < use->n_map_members; i++)
5126 cp = use->cost_map + i;
5127 cand = cp->cand;
5128 if (!cand)
5129 continue;
5131 /* Already tried this. */
5132 if (cand->important)
5133 continue;
5135 if (iv_ca_cand_used_p (ivs, cand))
5136 continue;
5138 act_delta = NULL;
5139 iv_ca_set_cp (data, ivs, use, cp);
5140 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5141 iv_ca_set_no_cp (data, ivs, use);
5142 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5143 cp, act_delta);
5145 if (act_cost < best_cost)
5147 best_cost = act_cost;
5149 if (best_delta)
5150 iv_ca_delta_free (&best_delta);
5151 best_delta = act_delta;
5153 else
5154 iv_ca_delta_free (&act_delta);
5158 iv_ca_delta_commit (data, ivs, best_delta, true);
5159 iv_ca_delta_free (&best_delta);
5161 return (best_cost != INFTY);
5164 /* Finds an initial assignment of candidates to uses. */
5166 static struct iv_ca *
5167 get_initial_solution (struct ivopts_data *data)
5169 struct iv_ca *ivs = iv_ca_new (data);
5170 unsigned i;
5172 for (i = 0; i < n_iv_uses (data); i++)
5173 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5175 iv_ca_free (&ivs);
5176 return NULL;
5179 return ivs;
5182 /* Tries to improve set of induction variables IVS. */
5184 static bool
5185 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5187 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5188 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5189 struct iv_cand *cand;
5191 /* Try extending the set of induction variables by one. */
5192 for (i = 0; i < n_iv_cands (data); i++)
5194 cand = iv_cand (data, i);
5196 if (iv_ca_cand_used_p (ivs, cand))
5197 continue;
5199 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5200 if (!act_delta)
5201 continue;
5203 /* If we successfully added the candidate and the set is small enough,
5204 try optimizing it by removing other candidates. */
5205 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5207 iv_ca_delta_commit (data, ivs, act_delta, true);
5208 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5209 iv_ca_delta_commit (data, ivs, act_delta, false);
5210 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5213 if (acost < best_cost)
5215 best_cost = acost;
5216 iv_ca_delta_free (&best_delta);
5217 best_delta = act_delta;
5219 else
5220 iv_ca_delta_free (&act_delta);
5223 if (!best_delta)
5225 /* Try removing the candidates from the set instead. */
5226 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5228 /* Nothing more we can do. */
5229 if (!best_delta)
5230 return false;
5233 iv_ca_delta_commit (data, ivs, best_delta, true);
5234 gcc_assert (best_cost == iv_ca_cost (ivs));
5235 iv_ca_delta_free (&best_delta);
5236 return true;
5239 /* Attempts to find the optimal set of induction variables. We do simple
5240 greedy heuristic -- we try to replace at most one candidate in the selected
5241 solution and remove the unused ivs while this improves the cost. */
5243 static struct iv_ca *
5244 find_optimal_iv_set (struct ivopts_data *data)
5246 unsigned i;
5247 struct iv_ca *set;
5248 struct iv_use *use;
5250 /* Get the initial solution. */
5251 set = get_initial_solution (data);
5252 if (!set)
5254 if (dump_file && (dump_flags & TDF_DETAILS))
5255 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5256 return NULL;
5259 if (dump_file && (dump_flags & TDF_DETAILS))
5261 fprintf (dump_file, "Initial set of candidates:\n");
5262 iv_ca_dump (data, dump_file, set);
5265 while (try_improve_iv_set (data, set))
5267 if (dump_file && (dump_flags & TDF_DETAILS))
5269 fprintf (dump_file, "Improved to:\n");
5270 iv_ca_dump (data, dump_file, set);
5274 if (dump_file && (dump_flags & TDF_DETAILS))
5275 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5277 for (i = 0; i < n_iv_uses (data); i++)
5279 use = iv_use (data, i);
5280 use->selected = iv_ca_cand_for_use (set, use)->cand;
5283 return set;
5286 /* Creates a new induction variable corresponding to CAND. */
5288 static void
5289 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5291 block_stmt_iterator incr_pos;
5292 tree base;
5293 bool after = false;
5295 if (!cand->iv)
5296 return;
5298 switch (cand->pos)
5300 case IP_NORMAL:
5301 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5302 break;
5304 case IP_END:
5305 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5306 after = true;
5307 break;
5309 case IP_ORIGINAL:
5310 /* Mark that the iv is preserved. */
5311 name_info (data, cand->var_before)->preserve_biv = true;
5312 name_info (data, cand->var_after)->preserve_biv = true;
5314 /* Rewrite the increment so that it uses var_before directly. */
5315 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5317 return;
5320 gimple_add_tmp_var (cand->var_before);
5321 add_referenced_var (cand->var_before);
5323 base = unshare_expr (cand->iv->base);
5325 create_iv (base, unshare_expr (cand->iv->step),
5326 cand->var_before, data->current_loop,
5327 &incr_pos, after, &cand->var_before, &cand->var_after);
5330 /* Creates new induction variables described in SET. */
5332 static void
5333 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5335 unsigned i;
5336 struct iv_cand *cand;
5337 bitmap_iterator bi;
5339 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5341 cand = iv_cand (data, i);
5342 create_new_iv (data, cand);
5346 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5347 is true, remove also the ssa name defined by the statement. */
5349 static void
5350 remove_statement (tree stmt, bool including_defined_name)
5352 if (TREE_CODE (stmt) == PHI_NODE)
5354 remove_phi_node (stmt, NULL_TREE, including_defined_name);
5356 else
5358 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5360 bsi_remove (&bsi, true);
5364 /* Rewrites USE (definition of iv used in a nonlinear expression)
5365 using candidate CAND. */
5367 static void
5368 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5369 struct iv_use *use, struct iv_cand *cand)
5371 tree comp;
5372 tree op, stmts, tgt, ass;
5373 block_stmt_iterator bsi, pbsi;
5375 /* An important special case -- if we are asked to express value of
5376 the original iv by itself, just exit; there is no need to
5377 introduce a new computation (that might also need casting the
5378 variable to unsigned and back). */
5379 if (cand->pos == IP_ORIGINAL
5380 && cand->incremented_at == use->stmt)
5382 tree step, ctype, utype;
5383 enum tree_code incr_code = PLUS_EXPR;
5385 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5386 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5388 step = cand->iv->step;
5389 ctype = TREE_TYPE (step);
5390 utype = TREE_TYPE (cand->var_after);
5391 if (TREE_CODE (step) == NEGATE_EXPR)
5393 incr_code = MINUS_EXPR;
5394 step = TREE_OPERAND (step, 0);
5397 /* Check whether we may leave the computation unchanged.
5398 This is the case only if it does not rely on other
5399 computations in the loop -- otherwise, the computation
5400 we rely upon may be removed in remove_unused_ivs,
5401 thus leading to ICE. */
5402 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5403 if (TREE_CODE (op) == PLUS_EXPR
5404 || TREE_CODE (op) == MINUS_EXPR)
5406 if (TREE_OPERAND (op, 0) == cand->var_before)
5407 op = TREE_OPERAND (op, 1);
5408 else if (TREE_CODE (op) == PLUS_EXPR
5409 && TREE_OPERAND (op, 1) == cand->var_before)
5410 op = TREE_OPERAND (op, 0);
5411 else
5412 op = NULL_TREE;
5414 else
5415 op = NULL_TREE;
5417 if (op
5418 && (TREE_CODE (op) == INTEGER_CST
5419 || operand_equal_p (op, step, 0)))
5420 return;
5422 /* Otherwise, add the necessary computations to express
5423 the iv. */
5424 op = fold_convert (ctype, cand->var_before);
5425 comp = fold_convert (utype,
5426 build2 (incr_code, ctype, op,
5427 unshare_expr (step)));
5429 else
5430 comp = get_computation (data->current_loop, use, cand);
5432 switch (TREE_CODE (use->stmt))
5434 case PHI_NODE:
5435 tgt = PHI_RESULT (use->stmt);
5437 /* If we should keep the biv, do not replace it. */
5438 if (name_info (data, tgt)->preserve_biv)
5439 return;
5441 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5442 while (!bsi_end_p (pbsi)
5443 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5445 bsi = pbsi;
5446 bsi_next (&pbsi);
5448 break;
5450 case GIMPLE_MODIFY_STMT:
5451 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5452 bsi = bsi_for_stmt (use->stmt);
5453 break;
5455 default:
5456 gcc_unreachable ();
5459 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5461 if (TREE_CODE (use->stmt) == PHI_NODE)
5463 if (stmts)
5464 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5465 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
5466 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5467 remove_statement (use->stmt, false);
5468 SSA_NAME_DEF_STMT (tgt) = ass;
5470 else
5472 if (stmts)
5473 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5474 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5478 /* Replaces ssa name in index IDX by its basic variable. Callback for
5479 for_each_index. */
5481 static bool
5482 idx_remove_ssa_names (tree base, tree *idx,
5483 void *data ATTRIBUTE_UNUSED)
5485 tree *op;
5487 if (TREE_CODE (*idx) == SSA_NAME)
5488 *idx = SSA_NAME_VAR (*idx);
5490 if (TREE_CODE (base) == ARRAY_REF)
5492 op = &TREE_OPERAND (base, 2);
5493 if (*op
5494 && TREE_CODE (*op) == SSA_NAME)
5495 *op = SSA_NAME_VAR (*op);
5496 op = &TREE_OPERAND (base, 3);
5497 if (*op
5498 && TREE_CODE (*op) == SSA_NAME)
5499 *op = SSA_NAME_VAR (*op);
5502 return true;
5505 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5507 static tree
5508 unshare_and_remove_ssa_names (tree ref)
5510 ref = unshare_expr (ref);
5511 for_each_index (&ref, idx_remove_ssa_names, NULL);
5513 return ref;
5516 /* Extract the alias analysis info for the memory reference REF. There are
5517 several ways how this information may be stored and what precisely is
5518 its semantics depending on the type of the reference, but there always is
5519 somewhere hidden one _DECL node that is used to determine the set of
5520 virtual operands for the reference. The code below deciphers this jungle
5521 and extracts this single useful piece of information. */
5523 static tree
5524 get_ref_tag (tree ref, tree orig)
5526 tree var = get_base_address (ref);
5527 tree aref = NULL_TREE, tag, sv;
5528 HOST_WIDE_INT offset, size, maxsize;
5530 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5532 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5533 if (ref)
5534 break;
5537 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5538 return unshare_expr (sv);
5540 if (!var)
5541 return NULL_TREE;
5543 if (TREE_CODE (var) == INDIRECT_REF)
5545 /* If the base is a dereference of a pointer, first check its name memory
5546 tag. If it does not have one, use its symbol memory tag. */
5547 var = TREE_OPERAND (var, 0);
5548 if (TREE_CODE (var) != SSA_NAME)
5549 return NULL_TREE;
5551 if (SSA_NAME_PTR_INFO (var))
5553 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5554 if (tag)
5555 return tag;
5558 var = SSA_NAME_VAR (var);
5559 tag = var_ann (var)->symbol_mem_tag;
5560 gcc_assert (tag != NULL_TREE);
5561 return tag;
5563 else
5565 if (!DECL_P (var))
5566 return NULL_TREE;
5568 tag = var_ann (var)->symbol_mem_tag;
5569 if (tag)
5570 return tag;
5572 return var;
5576 /* Copies the reference information from OLD_REF to NEW_REF. */
5578 static void
5579 copy_ref_info (tree new_ref, tree old_ref)
5581 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5582 copy_mem_ref_info (new_ref, old_ref);
5583 else
5585 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5586 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5590 /* Rewrites USE (address that is an iv) using candidate CAND. */
5592 static void
5593 rewrite_use_address (struct ivopts_data *data,
5594 struct iv_use *use, struct iv_cand *cand)
5596 struct affine_tree_combination aff;
5597 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5598 tree ref;
5600 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5601 unshare_aff_combination (&aff);
5603 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5604 copy_ref_info (ref, *use->op_p);
5605 *use->op_p = ref;
5608 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5609 candidate CAND. */
5611 static void
5612 rewrite_use_compare (struct ivopts_data *data,
5613 struct iv_use *use, struct iv_cand *cand)
5615 tree comp;
5616 tree *op_p, cond, op, stmts, bound;
5617 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5618 enum tree_code compare;
5619 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5621 bound = cp->value;
5622 if (bound)
5624 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5625 tree var_type = TREE_TYPE (var);
5627 compare = iv_elimination_compare (data, use);
5628 bound = fold_convert (var_type, bound);
5629 op = force_gimple_operand (unshare_expr (bound), &stmts,
5630 true, NULL_TREE);
5632 if (stmts)
5633 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5635 *use->op_p = build2 (compare, boolean_type_node, var, op);
5636 update_stmt (use->stmt);
5637 return;
5640 /* The induction variable elimination failed; just express the original
5641 giv. */
5642 comp = get_computation (data->current_loop, use, cand);
5644 cond = *use->op_p;
5645 op_p = &TREE_OPERAND (cond, 0);
5646 if (TREE_CODE (*op_p) != SSA_NAME
5647 || zero_p (get_iv (data, *op_p)->step))
5648 op_p = &TREE_OPERAND (cond, 1);
5650 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5651 if (stmts)
5652 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5654 *op_p = op;
5657 /* Rewrites USE using candidate CAND. */
5659 static void
5660 rewrite_use (struct ivopts_data *data,
5661 struct iv_use *use, struct iv_cand *cand)
5663 switch (use->type)
5665 case USE_NONLINEAR_EXPR:
5666 rewrite_use_nonlinear_expr (data, use, cand);
5667 break;
5669 case USE_ADDRESS:
5670 rewrite_use_address (data, use, cand);
5671 break;
5673 case USE_COMPARE:
5674 rewrite_use_compare (data, use, cand);
5675 break;
5677 default:
5678 gcc_unreachable ();
5680 mark_new_vars_to_rename (use->stmt);
5683 /* Rewrite the uses using the selected induction variables. */
5685 static void
5686 rewrite_uses (struct ivopts_data *data)
5688 unsigned i;
5689 struct iv_cand *cand;
5690 struct iv_use *use;
5692 for (i = 0; i < n_iv_uses (data); i++)
5694 use = iv_use (data, i);
5695 cand = use->selected;
5696 gcc_assert (cand);
5698 rewrite_use (data, use, cand);
5702 /* Removes the ivs that are not used after rewriting. */
5704 static void
5705 remove_unused_ivs (struct ivopts_data *data)
5707 unsigned j;
5708 bitmap_iterator bi;
5710 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5712 struct version_info *info;
5714 info = ver_info (data, j);
5715 if (info->iv
5716 && !zero_p (info->iv->step)
5717 && !info->inv_id
5718 && !info->iv->have_use_for
5719 && !info->preserve_biv)
5720 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5724 /* Frees data allocated by the optimization of a single loop. */
5726 static void
5727 free_loop_data (struct ivopts_data *data)
5729 unsigned i, j;
5730 bitmap_iterator bi;
5731 tree obj;
5733 htab_empty (data->niters);
5735 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5737 struct version_info *info;
5739 info = ver_info (data, i);
5740 if (info->iv)
5741 free (info->iv);
5742 info->iv = NULL;
5743 info->has_nonlin_use = false;
5744 info->preserve_biv = false;
5745 info->inv_id = 0;
5747 bitmap_clear (data->relevant);
5748 bitmap_clear (data->important_candidates);
5750 for (i = 0; i < n_iv_uses (data); i++)
5752 struct iv_use *use = iv_use (data, i);
5754 free (use->iv);
5755 BITMAP_FREE (use->related_cands);
5756 for (j = 0; j < use->n_map_members; j++)
5757 if (use->cost_map[j].depends_on)
5758 BITMAP_FREE (use->cost_map[j].depends_on);
5759 free (use->cost_map);
5760 free (use);
5762 VEC_truncate (iv_use_p, data->iv_uses, 0);
5764 for (i = 0; i < n_iv_cands (data); i++)
5766 struct iv_cand *cand = iv_cand (data, i);
5768 if (cand->iv)
5769 free (cand->iv);
5770 if (cand->depends_on)
5771 BITMAP_FREE (cand->depends_on);
5772 free (cand);
5774 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5776 if (data->version_info_size < num_ssa_names)
5778 data->version_info_size = 2 * num_ssa_names;
5779 free (data->version_info);
5780 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5783 data->max_inv_id = 0;
5785 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5786 SET_DECL_RTL (obj, NULL_RTX);
5788 VEC_truncate (tree, decl_rtl_to_reset, 0);
5791 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5792 loop tree. */
5794 static void
5795 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5797 free_loop_data (data);
5798 free (data->version_info);
5799 BITMAP_FREE (data->relevant);
5800 BITMAP_FREE (data->important_candidates);
5801 htab_delete (data->niters);
5803 VEC_free (tree, heap, decl_rtl_to_reset);
5804 VEC_free (iv_use_p, heap, data->iv_uses);
5805 VEC_free (iv_cand_p, heap, data->iv_candidates);
5808 /* Optimizes the LOOP. Returns true if anything changed. */
5810 static bool
5811 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5813 bool changed = false;
5814 struct iv_ca *iv_ca;
5815 edge exit;
5817 data->current_loop = loop;
5819 if (dump_file && (dump_flags & TDF_DETAILS))
5821 fprintf (dump_file, "Processing loop %d\n", loop->num);
5823 exit = single_dom_exit (loop);
5824 if (exit)
5826 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5827 exit->src->index, exit->dest->index);
5828 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5829 fprintf (dump_file, "\n");
5832 fprintf (dump_file, "\n");
5835 /* For each ssa name determines whether it behaves as an induction variable
5836 in some loop. */
5837 if (!find_induction_variables (data))
5838 goto finish;
5840 /* Finds interesting uses (item 1). */
5841 find_interesting_uses (data);
5842 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5843 goto finish;
5845 /* Finds candidates for the induction variables (item 2). */
5846 find_iv_candidates (data);
5848 /* Calculates the costs (item 3, part 1). */
5849 determine_use_iv_costs (data);
5850 determine_iv_costs (data);
5851 determine_set_costs (data);
5853 /* Find the optimal set of induction variables (item 3, part 2). */
5854 iv_ca = find_optimal_iv_set (data);
5855 if (!iv_ca)
5856 goto finish;
5857 changed = true;
5859 /* Create the new induction variables (item 4, part 1). */
5860 create_new_ivs (data, iv_ca);
5861 iv_ca_free (&iv_ca);
5863 /* Rewrite the uses (item 4, part 2). */
5864 rewrite_uses (data);
5866 /* Remove the ivs that are unused after rewriting. */
5867 remove_unused_ivs (data);
5869 /* We have changed the structure of induction variables; it might happen
5870 that definitions in the scev database refer to some of them that were
5871 eliminated. */
5872 scev_reset ();
5874 finish:
5875 free_loop_data (data);
5877 return changed;
5880 /* Main entry point. Optimizes induction variables in loops. */
5882 void
5883 tree_ssa_iv_optimize (void)
5885 struct loop *loop;
5886 struct ivopts_data data;
5887 loop_iterator li;
5889 tree_ssa_iv_optimize_init (&data);
5891 /* Optimize the loops starting with the innermost ones. */
5892 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5894 if (dump_file && (dump_flags & TDF_DETAILS))
5895 flow_loop_dump (loop, dump_file, NULL, 1);
5897 tree_ssa_iv_optimize_loop (&data, loop);
5900 tree_ssa_iv_optimize_finalize (&data);