Mark ChangeLog
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob87f94e606edae0089fbe7ee8193f120b97fe6a58
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "hashtab.h"
86 #include "tree-chrec.h"
87 #include "tree-scalar-evolution.h"
88 #include "cfgloop.h"
89 #include "params.h"
90 #include "langhooks.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
96 this. */
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
101 struct iv
103 tree base; /* Initial value of the iv. */
104 tree base_object; /* A memory object to that the induction variable points. */
105 tree step; /* Step of the iv (constant only). */
106 tree ssa_name; /* The ssa name with the value. */
107 bool biv_p; /* Is it a biv? */
108 bool have_use_for; /* Do we already have a use for it? */
109 unsigned use_id; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
113 struct version_info
115 tree name; /* The ssa name. */
116 struct iv *iv; /* Induction variable description. */
117 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id; /* Id of an invariant. */
120 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 /* Types of uses. */
124 enum use_type
126 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
127 USE_ADDRESS, /* Use in an address. */
128 USE_COMPARE /* Use is a compare. */
131 /* The candidate - cost pair. */
132 struct cost_pair
134 struct iv_cand *cand; /* The candidate. */
135 unsigned cost; /* The cost. */
136 bitmap depends_on; /* The list of invariants that have to be
137 preserved. */
138 tree value; /* For final value elimination, the expression for
139 the final value of the iv. For iv elimination,
140 the new bound to compare with. */
143 /* Use. */
144 struct iv_use
146 unsigned id; /* The id of the use. */
147 enum use_type type; /* Type of the use. */
148 struct iv *iv; /* The induction variable it is based on. */
149 tree stmt; /* Statement in that it occurs. */
150 tree *op_p; /* The place where it occurs. */
151 bitmap related_cands; /* The set of "related" iv candidates, plus the common
152 important ones. */
154 unsigned n_map_members; /* Number of candidates in the cost_map list. */
155 struct cost_pair *cost_map;
156 /* The costs wrto the iv candidates. */
158 struct iv_cand *selected;
159 /* The selected candidate. */
162 /* The position where the iv is computed. */
163 enum iv_position
165 IP_NORMAL, /* At the end, just before the exit condition. */
166 IP_END, /* At the end of the latch block. */
167 IP_ORIGINAL /* The original biv. */
170 /* The induction variable candidate. */
171 struct iv_cand
173 unsigned id; /* The number of the candidate. */
174 bool important; /* Whether this is an "important" candidate, i.e. such
175 that it should be considered by all uses. */
176 enum iv_position pos; /* Where it is computed. */
177 tree incremented_at; /* For original biv, the statement where it is
178 incremented. */
179 tree var_before; /* The variable used for it before increment. */
180 tree var_after; /* The variable used for it after increment. */
181 struct iv *iv; /* The value of the candidate. NULL for
182 "pseudocandidate" used to indicate the possibility
183 to replace the final value of an iv by direct
184 computation of the value. */
185 unsigned cost; /* Cost of the candidate. */
186 bitmap depends_on; /* The list of invariants that are used in step of the
187 biv. */
190 /* The data used by the induction variable optimizations. */
192 typedef struct iv_use *iv_use_p;
193 DEF_VEC_P(iv_use_p);
194 DEF_VEC_ALLOC_P(iv_use_p,heap);
196 typedef struct iv_cand *iv_cand_p;
197 DEF_VEC_P(iv_cand_p);
198 DEF_VEC_ALLOC_P(iv_cand_p,heap);
200 struct ivopts_data
202 /* The currently optimized loop. */
203 struct loop *current_loop;
205 /* Number of registers used in it. */
206 unsigned regs_used;
208 /* Numbers of iterations for all exits of the current loop. */
209 htab_t niters;
211 /* The size of version_info array allocated. */
212 unsigned version_info_size;
214 /* The array of information for the ssa names. */
215 struct version_info *version_info;
217 /* The bitmap of indices in version_info whose value was changed. */
218 bitmap relevant;
220 /* The maximum invariant id. */
221 unsigned max_inv_id;
223 /* The uses of induction variables. */
224 VEC(iv_use_p,heap) *iv_uses;
226 /* The candidates. */
227 VEC(iv_cand_p,heap) *iv_candidates;
229 /* A bitmap of important candidates. */
230 bitmap important_candidates;
232 /* Whether to consider just related and important candidates when replacing a
233 use. */
234 bool consider_all_candidates;
237 /* An assignment of iv candidates to uses. */
239 struct iv_ca
241 /* The number of uses covered by the assignment. */
242 unsigned upto;
244 /* Number of uses that cannot be expressed by the candidates in the set. */
245 unsigned bad_uses;
247 /* Candidate assigned to a use, together with the related costs. */
248 struct cost_pair **cand_for_use;
250 /* Number of times each candidate is used. */
251 unsigned *n_cand_uses;
253 /* The candidates used. */
254 bitmap cands;
256 /* The number of candidates in the set. */
257 unsigned n_cands;
259 /* Total number of registers needed. */
260 unsigned n_regs;
262 /* Total cost of expressing uses. */
263 unsigned cand_use_cost;
265 /* Total cost of candidates. */
266 unsigned cand_cost;
268 /* Number of times each invariant is used. */
269 unsigned *n_invariant_uses;
271 /* Total cost of the assignment. */
272 unsigned cost;
275 /* Difference of two iv candidate assignments. */
277 struct iv_ca_delta
279 /* Changed use. */
280 struct iv_use *use;
282 /* An old assignment (for rollback purposes). */
283 struct cost_pair *old_cp;
285 /* A new assignment. */
286 struct cost_pair *new_cp;
288 /* Next change in the list. */
289 struct iv_ca_delta *next_change;
292 /* Bound on number of candidates below that all candidates are considered. */
294 #define CONSIDER_ALL_CANDIDATES_BOUND \
295 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297 /* If there are more iv occurrences, we just give up (it is quite unlikely that
298 optimizing such a loop would help, and it would take ages). */
300 #define MAX_CONSIDERED_USES \
301 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303 /* If there are at most this number of ivs in the set, try removing unnecessary
304 ivs from the set always. */
306 #define ALWAYS_PRUNE_CAND_SET_BOUND \
307 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309 /* The list of trees for that the decl_rtl field must be reset is stored
310 here. */
312 static VEC(tree,heap) *decl_rtl_to_reset;
314 /* Number of uses recorded in DATA. */
316 static inline unsigned
317 n_iv_uses (struct ivopts_data *data)
319 return VEC_length (iv_use_p, data->iv_uses);
322 /* Ith use recorded in DATA. */
324 static inline struct iv_use *
325 iv_use (struct ivopts_data *data, unsigned i)
327 return VEC_index (iv_use_p, data->iv_uses, i);
330 /* Number of candidates recorded in DATA. */
332 static inline unsigned
333 n_iv_cands (struct ivopts_data *data)
335 return VEC_length (iv_cand_p, data->iv_candidates);
338 /* Ith candidate recorded in DATA. */
340 static inline struct iv_cand *
341 iv_cand (struct ivopts_data *data, unsigned i)
343 return VEC_index (iv_cand_p, data->iv_candidates, i);
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
348 edge
349 single_dom_exit (struct loop *loop)
351 edge exit = loop->single_exit;
353 if (!exit)
354 return NULL;
356 if (!just_once_each_iteration_p (loop, exit->src))
357 return NULL;
359 return exit;
362 /* Dumps information about the induction variable IV to FILE. */
364 extern void dump_iv (FILE *, struct iv *);
365 void
366 dump_iv (FILE *file, struct iv *iv)
368 if (iv->ssa_name)
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
379 if (iv->step)
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
389 else
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
396 if (iv->base_object)
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
403 if (iv->biv_p)
404 fprintf (file, " is a biv\n");
407 /* Dumps information about the USE to FILE. */
409 extern void dump_use (FILE *, struct iv_use *);
410 void
411 dump_use (FILE *file, struct iv_use *use)
413 fprintf (file, "use %d\n", use->id);
415 switch (use->type)
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
419 break;
421 case USE_ADDRESS:
422 fprintf (file, " address\n");
423 break;
425 case USE_COMPARE:
426 fprintf (file, " compare\n");
427 break;
429 default:
430 gcc_unreachable ();
433 fprintf (file, " in statement ");
434 print_generic_expr (file, use->stmt, TDF_SLIM);
435 fprintf (file, "\n");
437 fprintf (file, " at position ");
438 if (use->op_p)
439 print_generic_expr (file, *use->op_p, TDF_SLIM);
440 fprintf (file, "\n");
442 dump_iv (file, use->iv);
444 if (use->related_cands)
446 fprintf (file, " related candidates ");
447 dump_bitmap (file, use->related_cands);
451 /* Dumps information about the uses to FILE. */
453 extern void dump_uses (FILE *, struct ivopts_data *);
454 void
455 dump_uses (FILE *file, struct ivopts_data *data)
457 unsigned i;
458 struct iv_use *use;
460 for (i = 0; i < n_iv_uses (data); i++)
462 use = iv_use (data, i);
464 dump_use (file, use);
465 fprintf (file, "\n");
469 /* Dumps information about induction variable candidate CAND to FILE. */
471 extern void dump_cand (FILE *, struct iv_cand *);
472 void
473 dump_cand (FILE *file, struct iv_cand *cand)
475 struct iv *iv = cand->iv;
477 fprintf (file, "candidate %d%s\n",
478 cand->id, cand->important ? " (important)" : "");
480 if (cand->depends_on)
482 fprintf (file, " depends on ");
483 dump_bitmap (file, cand->depends_on);
486 if (!iv)
488 fprintf (file, " final value replacement\n");
489 return;
492 switch (cand->pos)
494 case IP_NORMAL:
495 fprintf (file, " incremented before exit test\n");
496 break;
498 case IP_END:
499 fprintf (file, " incremented at end\n");
500 break;
502 case IP_ORIGINAL:
503 fprintf (file, " original biv\n");
504 break;
507 dump_iv (file, iv);
510 /* Returns the info for ssa version VER. */
512 static inline struct version_info *
513 ver_info (struct ivopts_data *data, unsigned ver)
515 return data->version_info + ver;
518 /* Returns the info for ssa name NAME. */
520 static inline struct version_info *
521 name_info (struct ivopts_data *data, tree name)
523 return ver_info (data, SSA_NAME_VERSION (name));
526 /* Checks whether there exists number X such that X * B = A, counting modulo
527 2^BITS. */
529 static bool
530 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
531 HOST_WIDE_INT *x)
533 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
534 unsigned HOST_WIDE_INT inv, ex, val;
535 unsigned i;
537 a &= mask;
538 b &= mask;
540 /* First divide the whole equation by 2 as long as possible. */
541 while (!(a & 1) && !(b & 1))
543 a >>= 1;
544 b >>= 1;
545 bits--;
546 mask >>= 1;
549 if (!(b & 1))
551 /* If b is still even, a is odd and there is no such x. */
552 return false;
555 /* Find the inverse of b. We compute it as
556 b^(2^(bits - 1) - 1) (mod 2^bits). */
557 inv = 1;
558 ex = b;
559 for (i = 0; i < bits - 1; i++)
561 inv = (inv * ex) & mask;
562 ex = (ex * ex) & mask;
565 val = (a * inv) & mask;
567 gcc_assert (((val * b) & mask) == a);
569 if ((val >> (bits - 1)) & 1)
570 val |= ~mask;
572 *x = val;
574 return true;
577 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
578 emitted in LOOP. */
580 static bool
581 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
583 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585 gcc_assert (bb);
587 if (sbb == loop->latch)
588 return true;
590 if (sbb != bb)
591 return false;
593 return stmt == last_stmt (bb);
596 /* Returns true if STMT if after the place where the original induction
597 variable CAND is incremented. */
599 static bool
600 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
602 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
603 basic_block stmt_bb = bb_for_stmt (stmt);
604 block_stmt_iterator bsi;
606 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
607 return false;
609 if (stmt_bb != cand_bb)
610 return true;
612 /* Scan the block from the end, since the original ivs are usually
613 incremented at the end of the loop body. */
614 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
616 if (bsi_stmt (bsi) == cand->incremented_at)
617 return false;
618 if (bsi_stmt (bsi) == stmt)
619 return true;
623 /* Returns true if STMT if after the place where the induction variable
624 CAND is incremented in LOOP. */
626 static bool
627 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
629 switch (cand->pos)
631 case IP_END:
632 return false;
634 case IP_NORMAL:
635 return stmt_after_ip_normal_pos (loop, stmt);
637 case IP_ORIGINAL:
638 return stmt_after_ip_original_pos (cand, stmt);
640 default:
641 gcc_unreachable ();
645 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
647 static bool
648 abnormal_ssa_name_p (tree exp)
650 if (!exp)
651 return false;
653 if (TREE_CODE (exp) != SSA_NAME)
654 return false;
656 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
659 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
660 abnormal phi node. Callback for for_each_index. */
662 static bool
663 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
664 void *data ATTRIBUTE_UNUSED)
666 if (TREE_CODE (base) == ARRAY_REF)
668 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
669 return false;
670 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
671 return false;
674 return !abnormal_ssa_name_p (*index);
677 /* Returns true if EXPR contains a ssa name that occurs in an
678 abnormal phi node. */
680 bool
681 contains_abnormal_ssa_name_p (tree expr)
683 enum tree_code code;
684 enum tree_code_class class;
686 if (!expr)
687 return false;
689 code = TREE_CODE (expr);
690 class = TREE_CODE_CLASS (code);
692 if (code == SSA_NAME)
693 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
695 if (code == INTEGER_CST
696 || is_gimple_min_invariant (expr))
697 return false;
699 if (code == ADDR_EXPR)
700 return !for_each_index (&TREE_OPERAND (expr, 0),
701 idx_contains_abnormal_ssa_name_p,
702 NULL);
704 switch (class)
706 case tcc_binary:
707 case tcc_comparison:
708 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
709 return true;
711 /* Fallthru. */
712 case tcc_unary:
713 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
714 return true;
716 break;
718 default:
719 gcc_unreachable ();
722 return false;
725 /* Element of the table in that we cache the numbers of iterations obtained
726 from exits of the loop. */
728 struct nfe_cache_elt
730 /* The edge for that the number of iterations is cached. */
731 edge exit;
733 /* Number of iterations corresponding to this exit, or NULL if it cannot be
734 determined. */
735 tree niter;
738 /* Hash function for nfe_cache_elt E. */
740 static hashval_t
741 nfe_hash (const void *e)
743 const struct nfe_cache_elt *elt = e;
745 return htab_hash_pointer (elt->exit);
748 /* Equality function for nfe_cache_elt E1 and edge E2. */
750 static int
751 nfe_eq (const void *e1, const void *e2)
753 const struct nfe_cache_elt *elt1 = e1;
755 return elt1->exit == e2;
758 /* Returns tree describing number of iterations determined from
759 EXIT of DATA->current_loop, or NULL if something goes wrong. */
761 static tree
762 niter_for_exit (struct ivopts_data *data, edge exit)
764 struct nfe_cache_elt *nfe_desc;
765 struct tree_niter_desc desc;
766 PTR *slot;
768 slot = htab_find_slot_with_hash (data->niters, exit,
769 htab_hash_pointer (exit),
770 INSERT);
772 if (!*slot)
774 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
775 nfe_desc->exit = exit;
777 /* Try to determine number of iterations. We must know it
778 unconditionally (i.e., without possibility of # of iterations
779 being zero). Also, we cannot safely work with ssa names that
780 appear in phi nodes on abnormal edges, so that we do not create
781 overlapping life ranges for them (PR 27283). */
782 if (number_of_iterations_exit (data->current_loop,
783 exit, &desc, true)
784 && zero_p (desc.may_be_zero)
785 && !contains_abnormal_ssa_name_p (desc.niter))
786 nfe_desc->niter = desc.niter;
787 else
788 nfe_desc->niter = NULL_TREE;
790 else
791 nfe_desc = *slot;
793 return nfe_desc->niter;
796 /* Returns tree describing number of iterations determined from
797 single dominating exit of DATA->current_loop, or NULL if something
798 goes wrong. */
800 static tree
801 niter_for_single_dom_exit (struct ivopts_data *data)
803 edge exit = single_dom_exit (data->current_loop);
805 if (!exit)
806 return NULL;
808 return niter_for_exit (data, exit);
811 /* Initializes data structures used by the iv optimization pass, stored
812 in DATA. */
814 static void
815 tree_ssa_iv_optimize_init (struct ivopts_data *data)
817 data->version_info_size = 2 * num_ssa_names;
818 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
819 data->relevant = BITMAP_ALLOC (NULL);
820 data->important_candidates = BITMAP_ALLOC (NULL);
821 data->max_inv_id = 0;
822 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
823 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
824 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
825 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
828 /* Returns a memory object to that EXPR points. In case we are able to
829 determine that it does not point to any such object, NULL is returned. */
831 static tree
832 determine_base_object (tree expr)
834 enum tree_code code = TREE_CODE (expr);
835 tree base, obj, op0, op1;
837 /* If this is a pointer casted to any type, we need to determine
838 the base object for the pointer; so handle conversions before
839 throwing away non-pointer expressions. */
840 if (TREE_CODE (expr) == NOP_EXPR
841 || TREE_CODE (expr) == CONVERT_EXPR)
842 return determine_base_object (TREE_OPERAND (expr, 0));
844 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
845 return NULL_TREE;
847 switch (code)
849 case INTEGER_CST:
850 return NULL_TREE;
852 case ADDR_EXPR:
853 obj = TREE_OPERAND (expr, 0);
854 base = get_base_address (obj);
856 if (!base)
857 return expr;
859 if (TREE_CODE (base) == INDIRECT_REF)
860 return determine_base_object (TREE_OPERAND (base, 0));
862 return fold_convert (ptr_type_node,
863 build_fold_addr_expr (base));
865 case PLUS_EXPR:
866 case MINUS_EXPR:
867 op0 = determine_base_object (TREE_OPERAND (expr, 0));
868 op1 = determine_base_object (TREE_OPERAND (expr, 1));
870 if (!op1)
871 return op0;
873 if (!op0)
874 return (code == PLUS_EXPR
875 ? op1
876 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
878 return fold_build2 (code, ptr_type_node, op0, op1);
880 default:
881 return fold_convert (ptr_type_node, expr);
885 /* Allocates an induction variable with given initial value BASE and step STEP
886 for loop LOOP. */
888 static struct iv *
889 alloc_iv (tree base, tree step)
891 struct iv *iv = XCNEW (struct iv);
893 if (step && integer_zerop (step))
894 step = NULL_TREE;
896 iv->base = base;
897 iv->base_object = determine_base_object (base);
898 iv->step = step;
899 iv->biv_p = false;
900 iv->have_use_for = false;
901 iv->use_id = 0;
902 iv->ssa_name = NULL_TREE;
904 return iv;
907 /* Sets STEP and BASE for induction variable IV. */
909 static void
910 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
912 struct version_info *info = name_info (data, iv);
914 gcc_assert (!info->iv);
916 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
917 info->iv = alloc_iv (base, step);
918 info->iv->ssa_name = iv;
921 /* Finds induction variable declaration for VAR. */
923 static struct iv *
924 get_iv (struct ivopts_data *data, tree var)
926 basic_block bb;
928 if (!name_info (data, var)->iv)
930 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
932 if (!bb
933 || !flow_bb_inside_loop_p (data->current_loop, bb))
934 set_iv (data, var, var, NULL_TREE);
937 return name_info (data, var)->iv;
940 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
941 not define a simple affine biv with nonzero step. */
943 static tree
944 determine_biv_step (tree phi)
946 struct loop *loop = bb_for_stmt (phi)->loop_father;
947 tree name = PHI_RESULT (phi);
948 affine_iv iv;
950 if (!is_gimple_reg (name))
951 return NULL_TREE;
953 if (!simple_iv (loop, phi, name, &iv, true))
954 return NULL_TREE;
956 return (zero_p (iv.step) ? NULL_TREE : iv.step);
959 /* Finds basic ivs. */
961 static bool
962 find_bivs (struct ivopts_data *data)
964 tree phi, step, type, base;
965 bool found = false;
966 struct loop *loop = data->current_loop;
968 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
970 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
971 continue;
973 step = determine_biv_step (phi);
974 if (!step)
975 continue;
977 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
978 base = expand_simple_operations (base);
979 if (contains_abnormal_ssa_name_p (base)
980 || contains_abnormal_ssa_name_p (step))
981 continue;
983 type = TREE_TYPE (PHI_RESULT (phi));
984 base = fold_convert (type, base);
985 if (step)
986 step = fold_convert (type, step);
988 set_iv (data, PHI_RESULT (phi), base, step);
989 found = true;
992 return found;
995 /* Marks basic ivs. */
997 static void
998 mark_bivs (struct ivopts_data *data)
1000 tree phi, var;
1001 struct iv *iv, *incr_iv;
1002 struct loop *loop = data->current_loop;
1003 basic_block incr_bb;
1005 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007 iv = get_iv (data, PHI_RESULT (phi));
1008 if (!iv)
1009 continue;
1011 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1012 incr_iv = get_iv (data, var);
1013 if (!incr_iv)
1014 continue;
1016 /* If the increment is in the subloop, ignore it. */
1017 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1018 if (incr_bb->loop_father != data->current_loop
1019 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1020 continue;
1022 iv->biv_p = true;
1023 incr_iv->biv_p = true;
1027 /* Checks whether STMT defines a linear induction variable and stores its
1028 parameters to IV. */
1030 static bool
1031 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033 tree lhs;
1034 struct loop *loop = data->current_loop;
1036 iv->base = NULL_TREE;
1037 iv->step = NULL_TREE;
1039 if (TREE_CODE (stmt) != MODIFY_EXPR)
1040 return false;
1042 lhs = TREE_OPERAND (stmt, 0);
1043 if (TREE_CODE (lhs) != SSA_NAME)
1044 return false;
1046 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1047 return false;
1048 iv->base = expand_simple_operations (iv->base);
1050 if (contains_abnormal_ssa_name_p (iv->base)
1051 || contains_abnormal_ssa_name_p (iv->step))
1052 return false;
1054 return true;
1057 /* Finds general ivs in statement STMT. */
1059 static void
1060 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062 affine_iv iv;
1064 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1065 return;
1067 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1070 /* Finds general ivs in basic block BB. */
1072 static void
1073 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075 block_stmt_iterator bsi;
1077 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1078 find_givs_in_stmt (data, bsi_stmt (bsi));
1081 /* Finds general ivs. */
1083 static void
1084 find_givs (struct ivopts_data *data)
1086 struct loop *loop = data->current_loop;
1087 basic_block *body = get_loop_body_in_dom_order (loop);
1088 unsigned i;
1090 for (i = 0; i < loop->num_nodes; i++)
1091 find_givs_in_bb (data, body[i]);
1092 free (body);
1095 /* For each ssa name defined in LOOP determines whether it is an induction
1096 variable and if so, its initial value and step. */
1098 static bool
1099 find_induction_variables (struct ivopts_data *data)
1101 unsigned i;
1102 bitmap_iterator bi;
1104 if (!find_bivs (data))
1105 return false;
1107 find_givs (data);
1108 mark_bivs (data);
1110 if (dump_file && (dump_flags & TDF_DETAILS))
1112 tree niter = niter_for_single_dom_exit (data);
1114 if (niter)
1116 fprintf (dump_file, " number of iterations ");
1117 print_generic_expr (dump_file, niter, TDF_SLIM);
1118 fprintf (dump_file, "\n\n");
1121 fprintf (dump_file, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125 if (ver_info (data, i)->iv)
1126 dump_iv (dump_file, ver_info (data, i)->iv);
1130 return true;
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use *
1136 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1137 tree stmt, enum use_type use_type)
1139 struct iv_use *use = XCNEW (struct iv_use);
1141 use->id = n_iv_uses (data);
1142 use->type = use_type;
1143 use->iv = iv;
1144 use->stmt = stmt;
1145 use->op_p = use_p;
1146 use->related_cands = BITMAP_ALLOC (NULL);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1149 caller. */
1150 iv->ssa_name = NULL_TREE;
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1153 dump_use (dump_file, use);
1155 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157 return use;
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1164 static void
1165 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 basic_block bb;
1168 struct version_info *info;
1170 if (TREE_CODE (op) != SSA_NAME
1171 || !is_gimple_reg (op))
1172 return;
1174 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1175 if (bb
1176 && flow_bb_inside_loop_p (data->current_loop, bb))
1177 return;
1179 info = name_info (data, op);
1180 info->name = op;
1181 info->has_nonlin_use |= nonlinear_use;
1182 if (!info->inv_id)
1183 info->inv_id = ++data->max_inv_id;
1184 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use *
1190 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 struct iv *iv;
1193 struct iv *civ;
1194 tree stmt;
1195 struct iv_use *use;
1197 if (TREE_CODE (op) != SSA_NAME)
1198 return NULL;
1200 iv = get_iv (data, op);
1201 if (!iv)
1202 return NULL;
1204 if (iv->have_use_for)
1206 use = iv_use (data, iv->use_id);
1208 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1209 return use;
1212 if (zero_p (iv->step))
1214 record_invariant (data, op, true);
1215 return NULL;
1217 iv->have_use_for = true;
1219 civ = XNEW (struct iv);
1220 *civ = *iv;
1222 stmt = SSA_NAME_DEF_STMT (op);
1223 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1224 || TREE_CODE (stmt) == MODIFY_EXPR);
1226 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1227 iv->use_id = use->id;
1229 return use;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1235 static void
1236 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 tree *op0_p;
1239 tree *op1_p;
1240 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1241 struct iv const_iv;
1242 tree zero = integer_zero_node;
1244 const_iv.step = NULL_TREE;
1246 if (TREE_CODE (*cond_p) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p))
1248 return;
1250 if (TREE_CODE (*cond_p) == SSA_NAME)
1252 op0_p = cond_p;
1253 op1_p = &zero;
1255 else
1257 op0_p = &TREE_OPERAND (*cond_p, 0);
1258 op1_p = &TREE_OPERAND (*cond_p, 1);
1261 if (TREE_CODE (*op0_p) == SSA_NAME)
1262 iv0 = get_iv (data, *op0_p);
1263 else
1264 iv0 = &const_iv;
1266 if (TREE_CODE (*op1_p) == SSA_NAME)
1267 iv1 = get_iv (data, *op1_p);
1268 else
1269 iv1 = &const_iv;
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1273 (!iv0 || !iv1)
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278 find_interesting_uses_op (data, *op0_p);
1279 find_interesting_uses_op (data, *op1_p);
1280 return;
1283 if (zero_p (iv0->step) && zero_p (iv1->step))
1285 /* If both are invariants, this is a work for unswitching. */
1286 return;
1289 civ = XNEW (struct iv);
1290 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1291 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1297 bool
1298 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 basic_block def_bb;
1301 unsigned i, len;
1303 if (is_gimple_min_invariant (expr))
1304 return true;
1306 if (TREE_CODE (expr) == SSA_NAME)
1308 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1309 if (def_bb
1310 && flow_bb_inside_loop_p (loop, def_bb))
1311 return false;
1313 return true;
1316 if (!EXPR_P (expr))
1317 return false;
1319 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1320 for (i = 0; i < len; i++)
1321 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1322 return false;
1324 return true;
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data *ivopts_data;
1334 tree stmt;
1335 tree *step_p;
1338 static bool
1339 idx_find_step (tree base, tree *idx, void *data)
1341 struct ifs_ivopts_data *dta = data;
1342 struct iv *iv;
1343 tree step, iv_base, iv_step, lbound, off;
1344 struct loop *loop = dta->ivopts_data->current_loop;
1346 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1348 return false;
1350 /* If base is a component ref, require that the offset of the reference
1351 be invariant. */
1352 if (TREE_CODE (base) == COMPONENT_REF)
1354 off = component_ref_field_offset (base);
1355 return expr_invariant_in_loop_p (loop, off);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base) == ARRAY_REF)
1364 step = array_ref_element_size (base);
1365 lbound = array_ref_low_bound (base);
1367 if (!expr_invariant_in_loop_p (loop, step)
1368 || !expr_invariant_in_loop_p (loop, lbound))
1369 return false;
1372 if (TREE_CODE (*idx) != SSA_NAME)
1373 return true;
1375 iv = get_iv (dta->ivopts_data, *idx);
1376 if (!iv)
1377 return false;
1379 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1380 *&x[0], which is not folded and does not trigger the
1381 ARRAY_REF path below. */
1382 *idx = iv->base;
1384 if (!iv->step)
1385 return true;
1387 if (TREE_CODE (base) == ARRAY_REF)
1389 step = array_ref_element_size (base);
1391 /* We only handle addresses whose step is an integer constant. */
1392 if (TREE_CODE (step) != INTEGER_CST)
1393 return false;
1395 else
1396 /* The step for pointer arithmetics already is 1 byte. */
1397 step = build_int_cst (sizetype, 1);
1399 iv_base = iv->base;
1400 iv_step = iv->step;
1401 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1402 sizetype, &iv_base, &iv_step, dta->stmt,
1403 false))
1405 /* The index might wrap. */
1406 return false;
1409 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411 if (!*dta->step_p)
1412 *dta->step_p = step;
1413 else
1414 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416 return true;
1419 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1420 object is passed to it in DATA. */
1422 static bool
1423 idx_record_use (tree base, tree *idx,
1424 void *data)
1426 find_interesting_uses_op (data, *idx);
1427 if (TREE_CODE (base) == ARRAY_REF)
1429 find_interesting_uses_op (data, array_ref_element_size (base));
1430 find_interesting_uses_op (data, array_ref_low_bound (base));
1432 return true;
1435 /* Returns true if memory reference REF may be unaligned. */
1437 static bool
1438 may_be_unaligned_p (tree ref)
1440 tree base;
1441 tree base_type;
1442 HOST_WIDE_INT bitsize;
1443 HOST_WIDE_INT bitpos;
1444 tree toffset;
1445 enum machine_mode mode;
1446 int unsignedp, volatilep;
1447 unsigned base_align;
1449 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1450 thus they are not misaligned. */
1451 if (TREE_CODE (ref) == TARGET_MEM_REF)
1452 return false;
1454 /* The test below is basically copy of what expr.c:normal_inner_ref
1455 does to check whether the object must be loaded by parts when
1456 STRICT_ALIGNMENT is true. */
1457 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1458 &unsignedp, &volatilep, true);
1459 base_type = TREE_TYPE (base);
1460 base_align = TYPE_ALIGN (base_type);
1462 if (mode != BLKmode
1463 && (base_align < GET_MODE_ALIGNMENT (mode)
1464 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1465 || bitpos % BITS_PER_UNIT != 0))
1466 return true;
1468 return false;
1471 /* Return true if EXPR may be non-addressable. */
1473 static bool
1474 may_be_nonaddressable_p (tree expr)
1476 switch (TREE_CODE (expr))
1478 case COMPONENT_REF:
1479 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1480 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482 case ARRAY_REF:
1483 case ARRAY_RANGE_REF:
1484 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486 case VIEW_CONVERT_EXPR:
1487 /* This kind of view-conversions may wrap non-addressable objects
1488 and make them look addressable. After some processing the
1489 non-addressability may be uncovered again, causing ADDR_EXPRs
1490 of inappropriate objects to be built. */
1491 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1492 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494 default:
1495 break;
1498 return false;
1501 /* Finds addresses in *OP_P inside STMT. */
1503 static void
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 tree base = *op_p, step = NULL;
1507 struct iv *civ;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1510 /* Do not play with volatile memory references. A bit too conservative,
1511 perhaps, but safe. */
1512 if (stmt_ann (stmt)->has_volatile_ops)
1513 goto fail;
1515 /* Ignore bitfields for now. Not really something terribly complicated
1516 to handle. TODO. */
1517 if (TREE_CODE (base) == BIT_FIELD_REF)
1518 goto fail;
1520 if (may_be_nonaddressable_p (base))
1521 goto fail;
1523 if (STRICT_ALIGNMENT
1524 && may_be_unaligned_p (base))
1525 goto fail;
1527 base = unshare_expr (base);
1529 if (TREE_CODE (base) == TARGET_MEM_REF)
1531 tree type = build_pointer_type (TREE_TYPE (base));
1532 tree astep;
1534 if (TMR_BASE (base)
1535 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537 civ = get_iv (data, TMR_BASE (base));
1538 if (!civ)
1539 goto fail;
1541 TMR_BASE (base) = civ->base;
1542 step = civ->step;
1544 if (TMR_INDEX (base)
1545 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547 civ = get_iv (data, TMR_INDEX (base));
1548 if (!civ)
1549 goto fail;
1551 TMR_INDEX (base) = civ->base;
1552 astep = civ->step;
1554 if (astep)
1556 if (TMR_STEP (base))
1557 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559 if (step)
1560 step = fold_build2 (PLUS_EXPR, type, step, astep);
1561 else
1562 step = astep;
1566 if (zero_p (step))
1567 goto fail;
1568 base = tree_mem_ref_addr (type, base);
1570 else
1572 ifs_ivopts_data.ivopts_data = data;
1573 ifs_ivopts_data.stmt = stmt;
1574 ifs_ivopts_data.step_p = &step;
1575 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1576 || zero_p (step))
1577 goto fail;
1579 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1580 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582 base = build_fold_addr_expr (base);
1584 /* Substituting bases of IVs into the base expression might
1585 have caused folding opportunities. */
1586 if (TREE_CODE (base) == ADDR_EXPR)
1588 tree *ref = &TREE_OPERAND (base, 0);
1589 while (handled_component_p (*ref))
1590 ref = &TREE_OPERAND (*ref, 0);
1591 if (TREE_CODE (*ref) == INDIRECT_REF)
1592 *ref = fold_indirect_ref (*ref);
1596 civ = alloc_iv (base, step);
1597 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1598 return;
1600 fail:
1601 for_each_index (op_p, idx_record_use, data);
1604 /* Finds and records invariants used in STMT. */
1606 static void
1607 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 ssa_op_iter iter;
1610 use_operand_p use_p;
1611 tree op;
1613 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615 op = USE_FROM_PTR (use_p);
1616 record_invariant (data, op, false);
1620 /* Finds interesting uses of induction variables in the statement STMT. */
1622 static void
1623 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 struct iv *iv;
1626 tree op, lhs, rhs;
1627 ssa_op_iter iter;
1628 use_operand_p use_p;
1630 find_invariants_stmt (data, stmt);
1632 if (TREE_CODE (stmt) == COND_EXPR)
1634 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1635 return;
1638 if (TREE_CODE (stmt) == MODIFY_EXPR)
1640 lhs = TREE_OPERAND (stmt, 0);
1641 rhs = TREE_OPERAND (stmt, 1);
1643 if (TREE_CODE (lhs) == SSA_NAME)
1645 /* If the statement defines an induction variable, the uses are not
1646 interesting by themselves. */
1648 iv = get_iv (data, lhs);
1650 if (iv && !zero_p (iv->step))
1651 return;
1654 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656 case tcc_comparison:
1657 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1658 return;
1660 case tcc_reference:
1661 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1662 if (REFERENCE_CLASS_P (lhs))
1663 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1664 return;
1666 default: ;
1669 if (REFERENCE_CLASS_P (lhs)
1670 && is_gimple_val (rhs))
1672 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1673 find_interesting_uses_op (data, rhs);
1674 return;
1677 /* TODO -- we should also handle address uses of type
1679 memory = call (whatever);
1683 call (memory). */
1686 if (TREE_CODE (stmt) == PHI_NODE
1687 && bb_for_stmt (stmt) == data->current_loop->header)
1689 lhs = PHI_RESULT (stmt);
1690 iv = get_iv (data, lhs);
1692 if (iv && !zero_p (iv->step))
1693 return;
1696 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698 op = USE_FROM_PTR (use_p);
1700 if (TREE_CODE (op) != SSA_NAME)
1701 continue;
1703 iv = get_iv (data, op);
1704 if (!iv)
1705 continue;
1707 find_interesting_uses_op (data, op);
1711 /* Finds interesting uses of induction variables outside of loops
1712 on loop exit edge EXIT. */
1714 static void
1715 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 tree phi, def;
1719 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1722 find_interesting_uses_op (data, def);
1726 /* Finds uses of the induction variables that are interesting. */
1728 static void
1729 find_interesting_uses (struct ivopts_data *data)
1731 basic_block bb;
1732 block_stmt_iterator bsi;
1733 tree phi;
1734 basic_block *body = get_loop_body (data->current_loop);
1735 unsigned i;
1736 struct version_info *info;
1737 edge e;
1739 if (dump_file && (dump_flags & TDF_DETAILS))
1740 fprintf (dump_file, "Uses:\n\n");
1742 for (i = 0; i < data->current_loop->num_nodes; i++)
1744 edge_iterator ei;
1745 bb = body[i];
1747 FOR_EACH_EDGE (e, ei, bb->succs)
1748 if (e->dest != EXIT_BLOCK_PTR
1749 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1750 find_interesting_uses_outside (data, e);
1752 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1753 find_interesting_uses_stmt (data, phi);
1754 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1755 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1760 bitmap_iterator bi;
1762 fprintf (dump_file, "\n");
1764 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 info = ver_info (data, i);
1767 if (info->inv_id)
1769 fprintf (dump_file, " ");
1770 print_generic_expr (dump_file, info->name, TDF_SLIM);
1771 fprintf (dump_file, " is invariant (%d)%s\n",
1772 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1776 fprintf (dump_file, "\n");
1779 free (body);
1782 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1783 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1784 we are at the top-level of the processed address. */
1786 static tree
1787 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1788 unsigned HOST_WIDE_INT *offset)
1790 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1791 enum tree_code code;
1792 tree type, orig_type = TREE_TYPE (expr);
1793 unsigned HOST_WIDE_INT off0, off1, st;
1794 tree orig_expr = expr;
1796 STRIP_NOPS (expr);
1798 type = TREE_TYPE (expr);
1799 code = TREE_CODE (expr);
1800 *offset = 0;
1802 switch (code)
1804 case INTEGER_CST:
1805 if (!cst_and_fits_in_hwi (expr)
1806 || zero_p (expr))
1807 return orig_expr;
1809 *offset = int_cst_value (expr);
1810 return build_int_cst (orig_type, 0);
1812 case PLUS_EXPR:
1813 case MINUS_EXPR:
1814 op0 = TREE_OPERAND (expr, 0);
1815 op1 = TREE_OPERAND (expr, 1);
1817 op0 = strip_offset_1 (op0, false, false, &off0);
1818 op1 = strip_offset_1 (op1, false, false, &off1);
1820 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1821 if (op0 == TREE_OPERAND (expr, 0)
1822 && op1 == TREE_OPERAND (expr, 1))
1823 return orig_expr;
1825 if (zero_p (op1))
1826 expr = op0;
1827 else if (zero_p (op0))
1829 if (code == PLUS_EXPR)
1830 expr = op1;
1831 else
1832 expr = fold_build1 (NEGATE_EXPR, type, op1);
1834 else
1835 expr = fold_build2 (code, type, op0, op1);
1837 return fold_convert (orig_type, expr);
1839 case ARRAY_REF:
1840 if (!inside_addr)
1841 return orig_expr;
1843 step = array_ref_element_size (expr);
1844 if (!cst_and_fits_in_hwi (step))
1845 break;
1847 st = int_cst_value (step);
1848 op1 = TREE_OPERAND (expr, 1);
1849 op1 = strip_offset_1 (op1, false, false, &off1);
1850 *offset = off1 * st;
1852 if (top_compref
1853 && zero_p (op1))
1855 /* Strip the component reference completely. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858 *offset += off0;
1859 return op0;
1861 break;
1863 case COMPONENT_REF:
1864 if (!inside_addr)
1865 return orig_expr;
1867 tmp = component_ref_field_offset (expr);
1868 if (top_compref
1869 && cst_and_fits_in_hwi (tmp))
1871 /* Strip the component reference completely. */
1872 op0 = TREE_OPERAND (expr, 0);
1873 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1874 *offset = off0 + int_cst_value (tmp);
1875 return op0;
1877 break;
1879 case ADDR_EXPR:
1880 op0 = TREE_OPERAND (expr, 0);
1881 op0 = strip_offset_1 (op0, true, true, &off0);
1882 *offset += off0;
1884 if (op0 == TREE_OPERAND (expr, 0))
1885 return orig_expr;
1887 expr = build_fold_addr_expr (op0);
1888 return fold_convert (orig_type, expr);
1890 case INDIRECT_REF:
1891 inside_addr = false;
1892 break;
1894 default:
1895 return orig_expr;
1898 /* Default handling of expressions for that we want to recurse into
1899 the first operand. */
1900 op0 = TREE_OPERAND (expr, 0);
1901 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1902 *offset += off0;
1904 if (op0 == TREE_OPERAND (expr, 0)
1905 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1906 return orig_expr;
1908 expr = copy_node (expr);
1909 TREE_OPERAND (expr, 0) = op0;
1910 if (op1)
1911 TREE_OPERAND (expr, 1) = op1;
1913 /* Inside address, we might strip the top level component references,
1914 thus changing type of the expression. Handling of ADDR_EXPR
1915 will fix that. */
1916 expr = fold_convert (orig_type, expr);
1918 return expr;
1921 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1923 static tree
1924 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 return strip_offset_1 (expr, false, false, offset);
1929 /* Returns variant of TYPE that can be used as base for different uses.
1930 We return unsigned type with the same precision, which avoids problems
1931 with overflows. */
1933 static tree
1934 generic_type_for (tree type)
1936 if (POINTER_TYPE_P (type))
1937 return unsigned_type_for (type);
1939 if (TYPE_UNSIGNED (type))
1940 return type;
1942 return unsigned_type_for (type);
1945 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1946 the bitmap to that we should store it. */
1948 static struct ivopts_data *fd_ivopts_data;
1949 static tree
1950 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 bitmap *depends_on = data;
1953 struct version_info *info;
1955 if (TREE_CODE (*expr_p) != SSA_NAME)
1956 return NULL_TREE;
1957 info = name_info (fd_ivopts_data, *expr_p);
1959 if (!info->inv_id || info->has_nonlin_use)
1960 return NULL_TREE;
1962 if (!*depends_on)
1963 *depends_on = BITMAP_ALLOC (NULL);
1964 bitmap_set_bit (*depends_on, info->inv_id);
1966 return NULL_TREE;
1969 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1970 position to POS. If USE is not NULL, the candidate is set as related to
1971 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1972 replacement of the final value of the iv by a direct computation. */
1974 static struct iv_cand *
1975 add_candidate_1 (struct ivopts_data *data,
1976 tree base, tree step, bool important, enum iv_position pos,
1977 struct iv_use *use, tree incremented_at)
1979 unsigned i;
1980 struct iv_cand *cand = NULL;
1981 tree type, orig_type;
1983 if (base)
1985 orig_type = TREE_TYPE (base);
1986 type = generic_type_for (orig_type);
1987 if (type != orig_type)
1989 base = fold_convert (type, base);
1990 if (step)
1991 step = fold_convert (type, step);
1995 for (i = 0; i < n_iv_cands (data); i++)
1997 cand = iv_cand (data, i);
1999 if (cand->pos != pos)
2000 continue;
2002 if (cand->incremented_at != incremented_at)
2003 continue;
2005 if (!cand->iv)
2007 if (!base && !step)
2008 break;
2010 continue;
2013 if (!base && !step)
2014 continue;
2016 if (!operand_equal_p (base, cand->iv->base, 0))
2017 continue;
2019 if (zero_p (cand->iv->step))
2021 if (zero_p (step))
2022 break;
2024 else
2026 if (step && operand_equal_p (step, cand->iv->step, 0))
2027 break;
2031 if (i == n_iv_cands (data))
2033 cand = XCNEW (struct iv_cand);
2034 cand->id = i;
2036 if (!base && !step)
2037 cand->iv = NULL;
2038 else
2039 cand->iv = alloc_iv (base, step);
2041 cand->pos = pos;
2042 if (pos != IP_ORIGINAL && cand->iv)
2044 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2045 cand->var_after = cand->var_before;
2047 cand->important = important;
2048 cand->incremented_at = incremented_at;
2049 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051 if (step
2052 && TREE_CODE (step) != INTEGER_CST)
2054 fd_ivopts_data = data;
2055 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2058 if (dump_file && (dump_flags & TDF_DETAILS))
2059 dump_cand (dump_file, cand);
2062 if (important && !cand->important)
2064 cand->important = true;
2065 if (dump_file && (dump_flags & TDF_DETAILS))
2066 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2069 if (use)
2071 bitmap_set_bit (use->related_cands, i);
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2073 fprintf (dump_file, "Candidate %d is related to use %d\n",
2074 cand->id, use->id);
2077 return cand;
2080 /* Returns true if incrementing the induction variable at the end of the LOOP
2081 is allowed.
2083 The purpose is to avoid splitting latch edge with a biv increment, thus
2084 creating a jump, possibly confusing other optimization passes and leaving
2085 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2086 is not available (so we do not have a better alternative), or if the latch
2087 edge is already nonempty. */
2089 static bool
2090 allow_ip_end_pos_p (struct loop *loop)
2092 if (!ip_normal_pos (loop))
2093 return true;
2095 if (!empty_block_p (ip_end_pos (loop)))
2096 return true;
2098 return false;
2101 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2102 position to POS. If USE is not NULL, the candidate is set as related to
2103 it. The candidate computation is scheduled on all available positions. */
2105 static void
2106 add_candidate (struct ivopts_data *data,
2107 tree base, tree step, bool important, struct iv_use *use)
2109 if (ip_normal_pos (data->current_loop))
2110 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2111 if (ip_end_pos (data->current_loop)
2112 && allow_ip_end_pos_p (data->current_loop))
2113 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2116 /* Add a standard "0 + 1 * iteration" iv candidate for a
2117 type with SIZE bits. */
2119 static void
2120 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2121 unsigned int size)
2123 tree type = lang_hooks.types.type_for_size (size, true);
2124 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2125 true, NULL);
2128 /* Adds standard iv candidates. */
2130 static void
2131 add_standard_iv_candidates (struct ivopts_data *data)
2133 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135 /* The same for a double-integer type if it is still fast enough. */
2136 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2137 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2141 /* Adds candidates bases on the old induction variable IV. */
2143 static void
2144 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 tree phi, def;
2147 struct iv_cand *cand;
2149 add_candidate (data, iv->base, iv->step, true, NULL);
2151 /* The same, but with initial value zero. */
2152 add_candidate (data,
2153 build_int_cst (TREE_TYPE (iv->base), 0),
2154 iv->step, true, NULL);
2156 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2157 if (TREE_CODE (phi) == PHI_NODE)
2159 /* Additionally record the possibility of leaving the original iv
2160 untouched. */
2161 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2162 cand = add_candidate_1 (data,
2163 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2164 SSA_NAME_DEF_STMT (def));
2165 cand->var_before = iv->ssa_name;
2166 cand->var_after = def;
2170 /* Adds candidates based on the old induction variables. */
2172 static void
2173 add_old_ivs_candidates (struct ivopts_data *data)
2175 unsigned i;
2176 struct iv *iv;
2177 bitmap_iterator bi;
2179 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181 iv = ver_info (data, i)->iv;
2182 if (iv && iv->biv_p && !zero_p (iv->step))
2183 add_old_iv_candidates (data, iv);
2187 /* Adds candidates based on the value of the induction variable IV and USE. */
2189 static void
2190 add_iv_value_candidates (struct ivopts_data *data,
2191 struct iv *iv, struct iv_use *use)
2193 unsigned HOST_WIDE_INT offset;
2194 tree base;
2196 add_candidate (data, iv->base, iv->step, false, use);
2198 /* The same, but with initial value zero. Make such variable important,
2199 since it is generic enough so that possibly many uses may be based
2200 on it. */
2201 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2202 iv->step, true, use);
2204 /* Third, try removing the constant offset. */
2205 base = strip_offset (iv->base, &offset);
2206 if (offset)
2207 add_candidate (data, base, iv->step, false, use);
2210 /* Adds candidates based on the uses. */
2212 static void
2213 add_derived_ivs_candidates (struct ivopts_data *data)
2215 unsigned i;
2217 for (i = 0; i < n_iv_uses (data); i++)
2219 struct iv_use *use = iv_use (data, i);
2221 if (!use)
2222 continue;
2224 switch (use->type)
2226 case USE_NONLINEAR_EXPR:
2227 case USE_COMPARE:
2228 case USE_ADDRESS:
2229 /* Just add the ivs based on the value of the iv used here. */
2230 add_iv_value_candidates (data, use->iv, use);
2231 break;
2233 default:
2234 gcc_unreachable ();
2239 /* Record important candidates and add them to related_cands bitmaps
2240 if needed. */
2242 static void
2243 record_important_candidates (struct ivopts_data *data)
2245 unsigned i;
2246 struct iv_use *use;
2248 for (i = 0; i < n_iv_cands (data); i++)
2250 struct iv_cand *cand = iv_cand (data, i);
2252 if (cand->important)
2253 bitmap_set_bit (data->important_candidates, i);
2256 data->consider_all_candidates = (n_iv_cands (data)
2257 <= CONSIDER_ALL_CANDIDATES_BOUND);
2259 if (data->consider_all_candidates)
2261 /* We will not need "related_cands" bitmaps in this case,
2262 so release them to decrease peak memory consumption. */
2263 for (i = 0; i < n_iv_uses (data); i++)
2265 use = iv_use (data, i);
2266 BITMAP_FREE (use->related_cands);
2269 else
2271 /* Add important candidates to the related_cands bitmaps. */
2272 for (i = 0; i < n_iv_uses (data); i++)
2273 bitmap_ior_into (iv_use (data, i)->related_cands,
2274 data->important_candidates);
2278 /* Finds the candidates for the induction variables. */
2280 static void
2281 find_iv_candidates (struct ivopts_data *data)
2283 /* Add commonly used ivs. */
2284 add_standard_iv_candidates (data);
2286 /* Add old induction variables. */
2287 add_old_ivs_candidates (data);
2289 /* Add induction variables derived from uses. */
2290 add_derived_ivs_candidates (data);
2292 /* Record the important candidates. */
2293 record_important_candidates (data);
2296 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298 we allocate a simple list to every use. */
2300 static void
2301 alloc_use_cost_map (struct ivopts_data *data)
2303 unsigned i, size, s, j;
2305 for (i = 0; i < n_iv_uses (data); i++)
2307 struct iv_use *use = iv_use (data, i);
2308 bitmap_iterator bi;
2310 if (data->consider_all_candidates)
2311 size = n_iv_cands (data);
2312 else
2314 s = 0;
2315 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317 s++;
2320 /* Round up to the power of two, so that moduling by it is fast. */
2321 for (size = 1; size < s; size <<= 1)
2322 continue;
2325 use->n_map_members = size;
2326 use->cost_map = XCNEWVEC (struct cost_pair, size);
2330 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331 on invariants DEPENDS_ON and that the value used in expressing it
2332 is VALUE.*/
2334 static void
2335 set_use_iv_cost (struct ivopts_data *data,
2336 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2337 bitmap depends_on, tree value)
2339 unsigned i, s;
2341 if (cost == INFTY)
2343 BITMAP_FREE (depends_on);
2344 return;
2347 if (data->consider_all_candidates)
2349 use->cost_map[cand->id].cand = cand;
2350 use->cost_map[cand->id].cost = cost;
2351 use->cost_map[cand->id].depends_on = depends_on;
2352 use->cost_map[cand->id].value = value;
2353 return;
2356 /* n_map_members is a power of two, so this computes modulo. */
2357 s = cand->id & (use->n_map_members - 1);
2358 for (i = s; i < use->n_map_members; i++)
2359 if (!use->cost_map[i].cand)
2360 goto found;
2361 for (i = 0; i < s; i++)
2362 if (!use->cost_map[i].cand)
2363 goto found;
2365 gcc_unreachable ();
2367 found:
2368 use->cost_map[i].cand = cand;
2369 use->cost_map[i].cost = cost;
2370 use->cost_map[i].depends_on = depends_on;
2371 use->cost_map[i].value = value;
2374 /* Gets cost of (USE, CANDIDATE) pair. */
2376 static struct cost_pair *
2377 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2378 struct iv_cand *cand)
2380 unsigned i, s;
2381 struct cost_pair *ret;
2383 if (!cand)
2384 return NULL;
2386 if (data->consider_all_candidates)
2388 ret = use->cost_map + cand->id;
2389 if (!ret->cand)
2390 return NULL;
2392 return ret;
2395 /* n_map_members is a power of two, so this computes modulo. */
2396 s = cand->id & (use->n_map_members - 1);
2397 for (i = s; i < use->n_map_members; i++)
2398 if (use->cost_map[i].cand == cand)
2399 return use->cost_map + i;
2401 for (i = 0; i < s; i++)
2402 if (use->cost_map[i].cand == cand)
2403 return use->cost_map + i;
2405 return NULL;
2408 /* Returns estimate on cost of computing SEQ. */
2410 static unsigned
2411 seq_cost (rtx seq)
2413 unsigned cost = 0;
2414 rtx set;
2416 for (; seq; seq = NEXT_INSN (seq))
2418 set = single_set (seq);
2419 if (set)
2420 cost += rtx_cost (set, SET);
2421 else
2422 cost++;
2425 return cost;
2428 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2429 static rtx
2430 produce_memory_decl_rtl (tree obj, int *regno)
2432 rtx x;
2434 gcc_assert (obj);
2435 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2438 x = gen_rtx_SYMBOL_REF (Pmode, name);
2440 else
2441 x = gen_raw_REG (Pmode, (*regno)++);
2443 return gen_rtx_MEM (DECL_MODE (obj), x);
2446 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2447 walk_tree. DATA contains the actual fake register number. */
2449 static tree
2450 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452 tree obj = NULL_TREE;
2453 rtx x = NULL_RTX;
2454 int *regno = data;
2456 switch (TREE_CODE (*expr_p))
2458 case ADDR_EXPR:
2459 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2460 handled_component_p (*expr_p);
2461 expr_p = &TREE_OPERAND (*expr_p, 0))
2462 continue;
2463 obj = *expr_p;
2464 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2465 x = produce_memory_decl_rtl (obj, regno);
2466 break;
2468 case SSA_NAME:
2469 *ws = 0;
2470 obj = SSA_NAME_VAR (*expr_p);
2471 if (!DECL_RTL_SET_P (obj))
2472 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2473 break;
2475 case VAR_DECL:
2476 case PARM_DECL:
2477 case RESULT_DECL:
2478 *ws = 0;
2479 obj = *expr_p;
2481 if (DECL_RTL_SET_P (obj))
2482 break;
2484 if (DECL_MODE (obj) == BLKmode)
2485 x = produce_memory_decl_rtl (obj, regno);
2486 else
2487 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489 break;
2491 default:
2492 break;
2495 if (x)
2497 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2498 SET_DECL_RTL (obj, x);
2501 return NULL_TREE;
2504 /* Determines cost of the computation of EXPR. */
2506 static unsigned
2507 computation_cost (tree expr)
2509 rtx seq, rslt;
2510 tree type = TREE_TYPE (expr);
2511 unsigned cost;
2512 /* Avoid using hard regs in ways which may be unsupported. */
2513 int regno = LAST_VIRTUAL_REGISTER + 1;
2515 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2516 start_sequence ();
2517 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2518 seq = get_insns ();
2519 end_sequence ();
2521 cost = seq_cost (seq);
2522 if (MEM_P (rslt))
2523 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525 return cost;
2528 /* Returns variable containing the value of candidate CAND at statement AT. */
2530 static tree
2531 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533 if (stmt_after_increment (loop, cand, stmt))
2534 return cand->var_after;
2535 else
2536 return cand->var_before;
2539 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2540 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2543 tree_int_cst_sign_bit (tree t)
2545 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2546 unsigned HOST_WIDE_INT w;
2548 if (bitno < HOST_BITS_PER_WIDE_INT)
2549 w = TREE_INT_CST_LOW (t);
2550 else
2552 w = TREE_INT_CST_HIGH (t);
2553 bitno -= HOST_BITS_PER_WIDE_INT;
2556 return (w >> bitno) & 1;
2559 /* If we can prove that TOP = cst * BOT for some constant cst,
2560 store cst to MUL and return true. Otherwise return false.
2561 The returned value is always sign-extended, regardless of the
2562 signedness of TOP and BOT. */
2564 static bool
2565 constant_multiple_of (tree top, tree bot, double_int *mul)
2567 tree mby;
2568 enum tree_code code;
2569 double_int res, p0, p1;
2570 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572 STRIP_NOPS (top);
2573 STRIP_NOPS (bot);
2575 if (operand_equal_p (top, bot, 0))
2577 *mul = double_int_one;
2578 return true;
2581 code = TREE_CODE (top);
2582 switch (code)
2584 case MULT_EXPR:
2585 mby = TREE_OPERAND (top, 1);
2586 if (TREE_CODE (mby) != INTEGER_CST)
2587 return false;
2589 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2590 return false;
2592 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2593 precision);
2594 return true;
2596 case PLUS_EXPR:
2597 case MINUS_EXPR:
2598 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2599 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2600 return false;
2602 if (code == MINUS_EXPR)
2603 p1 = double_int_neg (p1);
2604 *mul = double_int_sext (double_int_add (p0, p1), precision);
2605 return true;
2607 case INTEGER_CST:
2608 if (TREE_CODE (bot) != INTEGER_CST)
2609 return false;
2611 p0 = double_int_sext (tree_to_double_int (top), precision);
2612 p1 = double_int_sext (tree_to_double_int (bot), precision);
2613 if (double_int_zero_p (p1))
2614 return false;
2615 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2616 precision);
2617 return double_int_zero_p (res);
2619 default:
2620 return false;
2624 /* Sets COMB to CST. */
2626 static void
2627 aff_combination_const (struct affine_tree_combination *comb, tree type,
2628 unsigned HOST_WIDE_INT cst)
2630 unsigned prec = TYPE_PRECISION (type);
2632 comb->type = type;
2633 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635 comb->n = 0;
2636 comb->rest = NULL_TREE;
2637 comb->offset = cst & comb->mask;
2640 /* Sets COMB to single element ELT. */
2642 static void
2643 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645 unsigned prec = TYPE_PRECISION (type);
2647 comb->type = type;
2648 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650 comb->n = 1;
2651 comb->elts[0] = elt;
2652 comb->coefs[0] = 1;
2653 comb->rest = NULL_TREE;
2654 comb->offset = 0;
2657 /* Scales COMB by SCALE. */
2659 static void
2660 aff_combination_scale (struct affine_tree_combination *comb,
2661 unsigned HOST_WIDE_INT scale)
2663 unsigned i, j;
2665 if (scale == 1)
2666 return;
2668 if (scale == 0)
2670 aff_combination_const (comb, comb->type, 0);
2671 return;
2674 comb->offset = (scale * comb->offset) & comb->mask;
2675 for (i = 0, j = 0; i < comb->n; i++)
2677 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2678 comb->elts[j] = comb->elts[i];
2679 if (comb->coefs[j] != 0)
2680 j++;
2682 comb->n = j;
2684 if (comb->rest)
2686 if (comb->n < MAX_AFF_ELTS)
2688 comb->coefs[comb->n] = scale;
2689 comb->elts[comb->n] = comb->rest;
2690 comb->rest = NULL_TREE;
2691 comb->n++;
2693 else
2694 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2695 build_int_cst_type (comb->type, scale));
2699 /* Adds ELT * SCALE to COMB. */
2701 static void
2702 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2703 unsigned HOST_WIDE_INT scale)
2705 unsigned i;
2707 if (scale == 0)
2708 return;
2710 for (i = 0; i < comb->n; i++)
2711 if (operand_equal_p (comb->elts[i], elt, 0))
2713 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2714 if (comb->coefs[i])
2715 return;
2717 comb->n--;
2718 comb->coefs[i] = comb->coefs[comb->n];
2719 comb->elts[i] = comb->elts[comb->n];
2721 if (comb->rest)
2723 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2724 comb->coefs[comb->n] = 1;
2725 comb->elts[comb->n] = comb->rest;
2726 comb->rest = NULL_TREE;
2727 comb->n++;
2729 return;
2731 if (comb->n < MAX_AFF_ELTS)
2733 comb->coefs[comb->n] = scale;
2734 comb->elts[comb->n] = elt;
2735 comb->n++;
2736 return;
2739 if (scale == 1)
2740 elt = fold_convert (comb->type, elt);
2741 else
2742 elt = fold_build2 (MULT_EXPR, comb->type,
2743 fold_convert (comb->type, elt),
2744 build_int_cst_type (comb->type, scale));
2746 if (comb->rest)
2747 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2748 else
2749 comb->rest = elt;
2752 /* Adds COMB2 to COMB1. */
2754 static void
2755 aff_combination_add (struct affine_tree_combination *comb1,
2756 struct affine_tree_combination *comb2)
2758 unsigned i;
2760 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2761 for (i = 0; i < comb2->n; i++)
2762 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2763 if (comb2->rest)
2764 aff_combination_add_elt (comb1, comb2->rest, 1);
2767 /* Convert COMB to TYPE. */
2769 static void
2770 aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772 unsigned prec = TYPE_PRECISION (type);
2773 unsigned i;
2775 /* If the precision of both types is the same, it suffices to change the type
2776 of the whole combination -- the elements are allowed to have another type
2777 equivalent wrto STRIP_NOPS. */
2778 if (prec == TYPE_PRECISION (comb->type))
2780 comb->type = type;
2781 return;
2784 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2785 comb->offset = comb->offset & comb->mask;
2787 /* The type of the elements can be different from comb->type only as
2788 much as what STRIP_NOPS would remove. We can just directly cast
2789 to TYPE. */
2790 for (i = 0; i < comb->n; i++)
2791 comb->elts[i] = fold_convert (type, comb->elts[i]);
2792 if (comb->rest)
2793 comb->rest = fold_convert (type, comb->rest);
2795 comb->type = type;
2798 /* Splits EXPR into an affine combination of parts. */
2800 static void
2801 tree_to_aff_combination (tree expr, tree type,
2802 struct affine_tree_combination *comb)
2804 struct affine_tree_combination tmp;
2805 enum tree_code code;
2806 tree cst, core, toffset;
2807 HOST_WIDE_INT bitpos, bitsize;
2808 enum machine_mode mode;
2809 int unsignedp, volatilep;
2811 STRIP_NOPS (expr);
2813 code = TREE_CODE (expr);
2814 switch (code)
2816 case INTEGER_CST:
2817 aff_combination_const (comb, type, int_cst_value (expr));
2818 return;
2820 case PLUS_EXPR:
2821 case MINUS_EXPR:
2822 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2823 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2824 if (code == MINUS_EXPR)
2825 aff_combination_scale (&tmp, -1);
2826 aff_combination_add (comb, &tmp);
2827 return;
2829 case MULT_EXPR:
2830 cst = TREE_OPERAND (expr, 1);
2831 if (TREE_CODE (cst) != INTEGER_CST)
2832 break;
2833 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2834 aff_combination_scale (comb, int_cst_value (cst));
2835 return;
2837 case NEGATE_EXPR:
2838 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839 aff_combination_scale (comb, -1);
2840 return;
2842 case ADDR_EXPR:
2843 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2844 &toffset, &mode, &unsignedp, &volatilep,
2845 false);
2846 if (bitpos % BITS_PER_UNIT != 0)
2847 break;
2848 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2849 core = build_fold_addr_expr (core);
2850 if (TREE_CODE (core) == ADDR_EXPR)
2851 aff_combination_add_elt (comb, core, 1);
2852 else
2854 tree_to_aff_combination (core, type, &tmp);
2855 aff_combination_add (comb, &tmp);
2857 if (toffset)
2859 tree_to_aff_combination (toffset, type, &tmp);
2860 aff_combination_add (comb, &tmp);
2862 return;
2864 default:
2865 break;
2868 aff_combination_elt (comb, type, expr);
2871 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2873 static tree
2874 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2875 unsigned HOST_WIDE_INT mask)
2877 enum tree_code code;
2879 scale &= mask;
2880 elt = fold_convert (type, elt);
2882 if (scale == 1)
2884 if (!expr)
2885 return elt;
2887 return fold_build2 (PLUS_EXPR, type, expr, elt);
2890 if (scale == mask)
2892 if (!expr)
2893 return fold_build1 (NEGATE_EXPR, type, elt);
2895 return fold_build2 (MINUS_EXPR, type, expr, elt);
2898 if (!expr)
2899 return fold_build2 (MULT_EXPR, type, elt,
2900 build_int_cst_type (type, scale));
2902 if ((scale | (mask >> 1)) == mask)
2904 /* Scale is negative. */
2905 code = MINUS_EXPR;
2906 scale = (-scale) & mask;
2908 else
2909 code = PLUS_EXPR;
2911 elt = fold_build2 (MULT_EXPR, type, elt,
2912 build_int_cst_type (type, scale));
2913 return fold_build2 (code, type, expr, elt);
2916 /* Copies the tree elements of COMB to ensure that they are not shared. */
2918 static void
2919 unshare_aff_combination (struct affine_tree_combination *comb)
2921 unsigned i;
2923 for (i = 0; i < comb->n; i++)
2924 comb->elts[i] = unshare_expr (comb->elts[i]);
2925 if (comb->rest)
2926 comb->rest = unshare_expr (comb->rest);
2929 /* Makes tree from the affine combination COMB. */
2931 static tree
2932 aff_combination_to_tree (struct affine_tree_combination *comb)
2934 tree type = comb->type;
2935 tree expr = comb->rest;
2936 unsigned i;
2937 unsigned HOST_WIDE_INT off, sgn;
2939 if (comb->n == 0 && comb->offset == 0)
2941 if (expr)
2943 /* Handle the special case produced by get_computation_aff when
2944 the type does not fit in HOST_WIDE_INT. */
2945 return fold_convert (type, expr);
2947 else
2948 return build_int_cst (type, 0);
2951 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953 for (i = 0; i < comb->n; i++)
2954 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2955 comb->mask);
2957 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959 /* Offset is negative. */
2960 off = (-comb->offset) & comb->mask;
2961 sgn = comb->mask;
2963 else
2965 off = comb->offset;
2966 sgn = 1;
2968 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2969 comb->mask);
2972 /* Folds EXPR using the affine expressions framework. */
2974 static tree
2975 fold_affine_expr (tree expr)
2977 tree type = TREE_TYPE (expr);
2978 struct affine_tree_combination comb;
2980 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2981 return expr;
2983 tree_to_aff_combination (expr, type, &comb);
2984 return aff_combination_to_tree (&comb);
2987 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2988 same precision that is at least as wide as the precision of TYPE, stores
2989 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2990 type of A and B. */
2992 static tree
2993 determine_common_wider_type (tree *a, tree *b)
2995 tree wider_type = NULL;
2996 tree suba, subb;
2997 tree atype = TREE_TYPE (*a);
2999 if ((TREE_CODE (*a) == NOP_EXPR
3000 || TREE_CODE (*a) == CONVERT_EXPR))
3002 suba = TREE_OPERAND (*a, 0);
3003 wider_type = TREE_TYPE (suba);
3004 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3005 return atype;
3007 else
3008 return atype;
3010 if ((TREE_CODE (*b) == NOP_EXPR
3011 || TREE_CODE (*b) == CONVERT_EXPR))
3013 subb = TREE_OPERAND (*b, 0);
3014 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3015 return atype;
3017 else
3018 return atype;
3020 *a = suba;
3021 *b = subb;
3022 return wider_type;
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND at statement AT in LOOP. The expression is stored in a decomposed
3027 form into AFF. Returns false if USE cannot be expressed using CAND. */
3029 static bool
3030 get_computation_aff (struct loop *loop,
3031 struct iv_use *use, struct iv_cand *cand, tree at,
3032 struct affine_tree_combination *aff)
3034 tree ubase = use->iv->base;
3035 tree ustep = use->iv->step;
3036 tree cbase = cand->iv->base;
3037 tree cstep = cand->iv->step;
3038 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3039 tree common_type;
3040 tree uutype;
3041 tree expr, delta;
3042 tree ratio;
3043 unsigned HOST_WIDE_INT ustepi, cstepi;
3044 HOST_WIDE_INT ratioi;
3045 struct affine_tree_combination cbase_aff, expr_aff;
3046 tree cstep_orig = cstep, ustep_orig = ustep;
3047 double_int rat;
3049 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051 /* We do not have a precision to express the values of use. */
3052 return false;
3055 expr = var_at_stmt (loop, cand, at);
3057 if (TREE_TYPE (expr) != ctype)
3059 /* This may happen with the original ivs. */
3060 expr = fold_convert (ctype, expr);
3063 if (TYPE_UNSIGNED (utype))
3064 uutype = utype;
3065 else
3067 uutype = unsigned_type_for (utype);
3068 ubase = fold_convert (uutype, ubase);
3069 ustep = fold_convert (uutype, ustep);
3072 if (uutype != ctype)
3074 expr = fold_convert (uutype, expr);
3075 cbase = fold_convert (uutype, cbase);
3076 cstep = fold_convert (uutype, cstep);
3078 /* If the conversion is not noop, we must take it into account when
3079 considering the value of the step. */
3080 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3081 cstep_orig = cstep;
3084 if (cst_and_fits_in_hwi (cstep_orig)
3085 && cst_and_fits_in_hwi (ustep_orig))
3087 ustepi = int_cst_value (ustep_orig);
3088 cstepi = int_cst_value (cstep_orig);
3090 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092 /* TODO maybe consider case when ustep divides cstep and the ratio is
3093 a power of 2 (so that the division is fast to execute)? We would
3094 need to be much more careful with overflows etc. then. */
3095 return false;
3098 ratio = build_int_cst_type (uutype, ratioi);
3100 else
3102 if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3103 return false;
3104 ratio = double_int_to_tree (uutype, rat);
3106 /* Ratioi is only used to detect special cases when the multiplicative
3107 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3108 set it to 0. */
3109 if (double_int_fits_in_shwi_p (rat))
3110 ratioi = double_int_to_shwi (rat);
3111 else
3112 ratioi = 0;
3115 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3116 type, we achieve better folding by computing their difference in this
3117 wider type, and cast the result to UUTYPE. We do not need to worry about
3118 overflows, as all the arithmetics will in the end be performed in UUTYPE
3119 anyway. */
3120 common_type = determine_common_wider_type (&ubase, &cbase);
3122 /* We may need to shift the value if we are after the increment. */
3123 if (stmt_after_increment (loop, cand, at))
3125 if (uutype != common_type)
3126 cstep = fold_convert (common_type, cstep);
3127 cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3130 /* use = ubase - ratio * cbase + ratio * var.
3132 In general case ubase + ratio * (var - cbase) could be better (one less
3133 multiplication), but often it is possible to eliminate redundant parts
3134 of computations from (ubase - ratio * cbase) term, and if it does not
3135 happen, fold is able to apply the distributive law to obtain this form
3136 anyway. */
3138 if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140 /* Let's compute in trees and just return the result in AFF. This case
3141 should not be very common, and fold itself is not that bad either,
3142 so making the aff. functions more complicated to handle this case
3143 is not that urgent. */
3144 if (ratioi == 1)
3146 delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3147 if (uutype != common_type)
3148 delta = fold_convert (uutype, delta);
3149 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151 else if (ratioi == -1)
3153 delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3154 if (uutype != common_type)
3155 delta = fold_convert (uutype, delta);
3156 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158 else
3160 delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3161 delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3162 if (uutype != common_type)
3163 delta = fold_convert (uutype, delta);
3164 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3165 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3168 aff->type = uutype;
3169 aff->n = 0;
3170 aff->offset = 0;
3171 aff->mask = 0;
3172 aff->rest = expr;
3173 return true;
3176 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3177 possible to compute ratioi. */
3178 gcc_assert (ratioi);
3180 tree_to_aff_combination (ubase, common_type, aff);
3181 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3182 tree_to_aff_combination (expr, uutype, &expr_aff);
3183 aff_combination_scale (&cbase_aff, -ratioi);
3184 aff_combination_scale (&expr_aff, ratioi);
3185 aff_combination_add (aff, &cbase_aff);
3186 if (common_type != uutype)
3187 aff_combination_convert (uutype, aff);
3188 aff_combination_add (aff, &expr_aff);
3190 return true;
3193 /* Determines the expression by that USE is expressed from induction variable
3194 CAND at statement AT in LOOP. The computation is unshared. */
3196 static tree
3197 get_computation_at (struct loop *loop,
3198 struct iv_use *use, struct iv_cand *cand, tree at)
3200 struct affine_tree_combination aff;
3201 tree type = TREE_TYPE (use->iv->base);
3203 if (!get_computation_aff (loop, use, cand, at, &aff))
3204 return NULL_TREE;
3205 unshare_aff_combination (&aff);
3206 return fold_convert (type, aff_combination_to_tree (&aff));
3209 /* Determines the expression by that USE is expressed from induction variable
3210 CAND in LOOP. The computation is unshared. */
3212 static tree
3213 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215 return get_computation_at (loop, use, cand, use->stmt);
3218 /* Returns cost of addition in MODE. */
3220 static unsigned
3221 add_cost (enum machine_mode mode)
3223 static unsigned costs[NUM_MACHINE_MODES];
3224 rtx seq;
3225 unsigned cost;
3227 if (costs[mode])
3228 return costs[mode];
3230 start_sequence ();
3231 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3232 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3233 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3234 NULL_RTX);
3235 seq = get_insns ();
3236 end_sequence ();
3238 cost = seq_cost (seq);
3239 if (!cost)
3240 cost = 1;
3242 costs[mode] = cost;
3244 if (dump_file && (dump_flags & TDF_DETAILS))
3245 fprintf (dump_file, "Addition in %s costs %d\n",
3246 GET_MODE_NAME (mode), cost);
3247 return cost;
3250 /* Entry in a hashtable of already known costs for multiplication. */
3251 struct mbc_entry
3253 HOST_WIDE_INT cst; /* The constant to multiply by. */
3254 enum machine_mode mode; /* In mode. */
3255 unsigned cost; /* The cost. */
3258 /* Counts hash value for the ENTRY. */
3260 static hashval_t
3261 mbc_entry_hash (const void *entry)
3263 const struct mbc_entry *e = entry;
3265 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3268 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3270 static int
3271 mbc_entry_eq (const void *entry1, const void *entry2)
3273 const struct mbc_entry *e1 = entry1;
3274 const struct mbc_entry *e2 = entry2;
3276 return (e1->mode == e2->mode
3277 && e1->cst == e2->cst);
3280 /* Returns cost of multiplication by constant CST in MODE. */
3282 unsigned
3283 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285 static htab_t costs;
3286 struct mbc_entry **cached, act;
3287 rtx seq;
3288 unsigned cost;
3290 if (!costs)
3291 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293 act.mode = mode;
3294 act.cst = cst;
3295 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3296 if (*cached)
3297 return (*cached)->cost;
3299 *cached = XNEW (struct mbc_entry);
3300 (*cached)->mode = mode;
3301 (*cached)->cst = cst;
3303 start_sequence ();
3304 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3305 gen_int_mode (cst, mode), NULL_RTX, 0);
3306 seq = get_insns ();
3307 end_sequence ();
3309 cost = seq_cost (seq);
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3312 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3313 (int) cst, GET_MODE_NAME (mode), cost);
3315 (*cached)->cost = cost;
3317 return cost;
3320 /* Returns true if multiplying by RATIO is allowed in address. */
3322 bool
3323 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3325 #define MAX_RATIO 128
3326 static sbitmap valid_mult;
3328 if (!valid_mult)
3330 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3331 rtx addr;
3332 HOST_WIDE_INT i;
3334 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3335 sbitmap_zero (valid_mult);
3336 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3337 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3339 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3340 if (memory_address_p (Pmode, addr))
3341 SET_BIT (valid_mult, i + MAX_RATIO);
3344 if (dump_file && (dump_flags & TDF_DETAILS))
3346 fprintf (dump_file, " allowed multipliers:");
3347 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3348 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3349 fprintf (dump_file, " %d", (int) i);
3350 fprintf (dump_file, "\n");
3351 fprintf (dump_file, "\n");
3355 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3356 return false;
3358 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3361 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3362 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3363 variable is omitted. The created memory accesses MODE.
3365 TODO -- there must be some better way. This all is quite crude. */
3367 static unsigned
3368 get_address_cost (bool symbol_present, bool var_present,
3369 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3371 static bool initialized = false;
3372 static HOST_WIDE_INT rat, off;
3373 static HOST_WIDE_INT min_offset, max_offset;
3374 static unsigned costs[2][2][2][2];
3375 unsigned cost, acost;
3376 bool offset_p, ratio_p;
3377 HOST_WIDE_INT s_offset;
3378 unsigned HOST_WIDE_INT mask;
3379 unsigned bits;
3381 if (!initialized)
3383 HOST_WIDE_INT i;
3384 int old_cse_not_expected;
3385 unsigned sym_p, var_p, off_p, rat_p, add_c;
3386 rtx seq, addr, base;
3387 rtx reg0, reg1;
3389 initialized = true;
3391 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3393 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3394 for (i = 1; i <= 1 << 20; i <<= 1)
3396 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3397 if (!memory_address_p (Pmode, addr))
3398 break;
3400 max_offset = i >> 1;
3401 off = max_offset;
3403 for (i = 1; i <= 1 << 20; i <<= 1)
3405 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3406 if (!memory_address_p (Pmode, addr))
3407 break;
3409 min_offset = -(i >> 1);
3411 if (dump_file && (dump_flags & TDF_DETAILS))
3413 fprintf (dump_file, "get_address_cost:\n");
3414 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3415 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3418 rat = 1;
3419 for (i = 2; i <= MAX_RATIO; i++)
3420 if (multiplier_allowed_in_address_p (i))
3422 rat = i;
3423 break;
3426 /* Compute the cost of various addressing modes. */
3427 acost = 0;
3428 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3429 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3431 for (i = 0; i < 16; i++)
3433 sym_p = i & 1;
3434 var_p = (i >> 1) & 1;
3435 off_p = (i >> 2) & 1;
3436 rat_p = (i >> 3) & 1;
3438 addr = reg0;
3439 if (rat_p)
3440 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3442 if (var_p)
3443 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3445 if (sym_p)
3447 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3448 if (off_p)
3449 base = gen_rtx_fmt_e (CONST, Pmode,
3450 gen_rtx_fmt_ee (PLUS, Pmode,
3451 base,
3452 gen_int_mode (off, Pmode)));
3454 else if (off_p)
3455 base = gen_int_mode (off, Pmode);
3456 else
3457 base = NULL_RTX;
3459 if (base)
3460 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3462 start_sequence ();
3463 /* To avoid splitting addressing modes, pretend that no cse will
3464 follow. */
3465 old_cse_not_expected = cse_not_expected;
3466 cse_not_expected = true;
3467 addr = memory_address (Pmode, addr);
3468 cse_not_expected = old_cse_not_expected;
3469 seq = get_insns ();
3470 end_sequence ();
3472 acost = seq_cost (seq);
3473 acost += address_cost (addr, Pmode);
3475 if (!acost)
3476 acost = 1;
3477 costs[sym_p][var_p][off_p][rat_p] = acost;
3480 /* On some targets, it is quite expensive to load symbol to a register,
3481 which makes addresses that contain symbols look much more expensive.
3482 However, the symbol will have to be loaded in any case before the
3483 loop (and quite likely we have it in register already), so it does not
3484 make much sense to penalize them too heavily. So make some final
3485 tweaks for the SYMBOL_PRESENT modes:
3487 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3488 var is cheaper, use this mode with small penalty.
3489 If VAR_PRESENT is true, try whether the mode with
3490 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3491 if this is the case, use it. */
3492 add_c = add_cost (Pmode);
3493 for (i = 0; i < 8; i++)
3495 var_p = i & 1;
3496 off_p = (i >> 1) & 1;
3497 rat_p = (i >> 2) & 1;
3499 acost = costs[0][1][off_p][rat_p] + 1;
3500 if (var_p)
3501 acost += add_c;
3503 if (acost < costs[1][var_p][off_p][rat_p])
3504 costs[1][var_p][off_p][rat_p] = acost;
3507 if (dump_file && (dump_flags & TDF_DETAILS))
3509 fprintf (dump_file, "Address costs:\n");
3511 for (i = 0; i < 16; i++)
3513 sym_p = i & 1;
3514 var_p = (i >> 1) & 1;
3515 off_p = (i >> 2) & 1;
3516 rat_p = (i >> 3) & 1;
3518 fprintf (dump_file, " ");
3519 if (sym_p)
3520 fprintf (dump_file, "sym + ");
3521 if (var_p)
3522 fprintf (dump_file, "var + ");
3523 if (off_p)
3524 fprintf (dump_file, "cst + ");
3525 if (rat_p)
3526 fprintf (dump_file, "rat * ");
3528 acost = costs[sym_p][var_p][off_p][rat_p];
3529 fprintf (dump_file, "index costs %d\n", acost);
3531 fprintf (dump_file, "\n");
3535 bits = GET_MODE_BITSIZE (Pmode);
3536 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3537 offset &= mask;
3538 if ((offset >> (bits - 1) & 1))
3539 offset |= ~mask;
3540 s_offset = offset;
3542 cost = 0;
3543 offset_p = (s_offset != 0
3544 && min_offset <= s_offset && s_offset <= max_offset);
3545 ratio_p = (ratio != 1
3546 && multiplier_allowed_in_address_p (ratio));
3548 if (ratio != 1 && !ratio_p)
3549 cost += multiply_by_cost (ratio, Pmode);
3551 if (s_offset && !offset_p && !symbol_present)
3553 cost += add_cost (Pmode);
3554 var_present = true;
3557 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3558 return cost + acost;
3561 /* Estimates cost of forcing expression EXPR into a variable. */
3563 unsigned
3564 force_expr_to_var_cost (tree expr)
3566 static bool costs_initialized = false;
3567 static unsigned integer_cost;
3568 static unsigned symbol_cost;
3569 static unsigned address_cost;
3570 tree op0, op1;
3571 unsigned cost0, cost1, cost;
3572 enum machine_mode mode;
3574 if (!costs_initialized)
3576 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3577 rtx x = gen_rtx_MEM (DECL_MODE (var),
3578 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3579 tree addr;
3580 tree type = build_pointer_type (integer_type_node);
3582 integer_cost = computation_cost (build_int_cst (integer_type_node,
3583 2000));
3585 SET_DECL_RTL (var, x);
3586 TREE_STATIC (var) = 1;
3587 addr = build1 (ADDR_EXPR, type, var);
3588 symbol_cost = computation_cost (addr) + 1;
3590 address_cost
3591 = computation_cost (build2 (PLUS_EXPR, type,
3592 addr,
3593 build_int_cst (type, 2000))) + 1;
3594 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, "force_expr_to_var_cost:\n");
3597 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3598 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3599 fprintf (dump_file, " address %d\n", (int) address_cost);
3600 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3601 fprintf (dump_file, "\n");
3604 costs_initialized = true;
3607 STRIP_NOPS (expr);
3609 if (SSA_VAR_P (expr))
3610 return 0;
3612 if (TREE_INVARIANT (expr))
3614 if (TREE_CODE (expr) == INTEGER_CST)
3615 return integer_cost;
3617 if (TREE_CODE (expr) == ADDR_EXPR)
3619 tree obj = TREE_OPERAND (expr, 0);
3621 if (TREE_CODE (obj) == VAR_DECL
3622 || TREE_CODE (obj) == PARM_DECL
3623 || TREE_CODE (obj) == RESULT_DECL)
3624 return symbol_cost;
3627 return address_cost;
3630 switch (TREE_CODE (expr))
3632 case PLUS_EXPR:
3633 case MINUS_EXPR:
3634 case MULT_EXPR:
3635 op0 = TREE_OPERAND (expr, 0);
3636 op1 = TREE_OPERAND (expr, 1);
3637 STRIP_NOPS (op0);
3638 STRIP_NOPS (op1);
3640 if (is_gimple_val (op0))
3641 cost0 = 0;
3642 else
3643 cost0 = force_expr_to_var_cost (op0);
3645 if (is_gimple_val (op1))
3646 cost1 = 0;
3647 else
3648 cost1 = force_expr_to_var_cost (op1);
3650 break;
3652 default:
3653 /* Just an arbitrary value, FIXME. */
3654 return target_spill_cost;
3657 mode = TYPE_MODE (TREE_TYPE (expr));
3658 switch (TREE_CODE (expr))
3660 case PLUS_EXPR:
3661 case MINUS_EXPR:
3662 cost = add_cost (mode);
3663 break;
3665 case MULT_EXPR:
3666 if (cst_and_fits_in_hwi (op0))
3667 cost = multiply_by_cost (int_cst_value (op0), mode);
3668 else if (cst_and_fits_in_hwi (op1))
3669 cost = multiply_by_cost (int_cst_value (op1), mode);
3670 else
3671 return target_spill_cost;
3672 break;
3674 default:
3675 gcc_unreachable ();
3678 cost += cost0;
3679 cost += cost1;
3681 /* Bound the cost by target_spill_cost. The parts of complicated
3682 computations often are either loop invariant or at least can
3683 be shared between several iv uses, so letting this grow without
3684 limits would not give reasonable results. */
3685 return cost < target_spill_cost ? cost : target_spill_cost;
3688 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3689 invariants the computation depends on. */
3691 static unsigned
3692 force_var_cost (struct ivopts_data *data,
3693 tree expr, bitmap *depends_on)
3695 if (depends_on)
3697 fd_ivopts_data = data;
3698 walk_tree (&expr, find_depends, depends_on, NULL);
3701 return force_expr_to_var_cost (expr);
3704 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3705 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3706 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3707 invariants the computation depends on. */
3709 static unsigned
3710 split_address_cost (struct ivopts_data *data,
3711 tree addr, bool *symbol_present, bool *var_present,
3712 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3714 tree core;
3715 HOST_WIDE_INT bitsize;
3716 HOST_WIDE_INT bitpos;
3717 tree toffset;
3718 enum machine_mode mode;
3719 int unsignedp, volatilep;
3721 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3722 &unsignedp, &volatilep, false);
3724 if (toffset != 0
3725 || bitpos % BITS_PER_UNIT != 0
3726 || TREE_CODE (core) != VAR_DECL)
3728 *symbol_present = false;
3729 *var_present = true;
3730 fd_ivopts_data = data;
3731 walk_tree (&addr, find_depends, depends_on, NULL);
3732 return target_spill_cost;
3735 *offset += bitpos / BITS_PER_UNIT;
3736 if (TREE_STATIC (core)
3737 || DECL_EXTERNAL (core))
3739 *symbol_present = true;
3740 *var_present = false;
3741 return 0;
3744 *symbol_present = false;
3745 *var_present = true;
3746 return 0;
3749 /* Estimates cost of expressing difference of addresses E1 - E2 as
3750 var + symbol + offset. The value of offset is added to OFFSET,
3751 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3752 part is missing. DEPENDS_ON is a set of the invariants the computation
3753 depends on. */
3755 static unsigned
3756 ptr_difference_cost (struct ivopts_data *data,
3757 tree e1, tree e2, bool *symbol_present, bool *var_present,
3758 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760 HOST_WIDE_INT diff = 0;
3761 unsigned cost;
3763 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3765 if (ptr_difference_const (e1, e2, &diff))
3767 *offset += diff;
3768 *symbol_present = false;
3769 *var_present = false;
3770 return 0;
3773 if (e2 == integer_zero_node)
3774 return split_address_cost (data, TREE_OPERAND (e1, 0),
3775 symbol_present, var_present, offset, depends_on);
3777 *symbol_present = false;
3778 *var_present = true;
3780 cost = force_var_cost (data, e1, depends_on);
3781 cost += force_var_cost (data, e2, depends_on);
3782 cost += add_cost (Pmode);
3784 return cost;
3787 /* Estimates cost of expressing difference E1 - E2 as
3788 var + symbol + offset. The value of offset is added to OFFSET,
3789 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3790 part is missing. DEPENDS_ON is a set of the invariants the computation
3791 depends on. */
3793 static unsigned
3794 difference_cost (struct ivopts_data *data,
3795 tree e1, tree e2, bool *symbol_present, bool *var_present,
3796 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3798 unsigned cost;
3799 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3800 unsigned HOST_WIDE_INT off1, off2;
3802 e1 = strip_offset (e1, &off1);
3803 e2 = strip_offset (e2, &off2);
3804 *offset += off1 - off2;
3806 STRIP_NOPS (e1);
3807 STRIP_NOPS (e2);
3809 if (TREE_CODE (e1) == ADDR_EXPR)
3810 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3811 depends_on);
3812 *symbol_present = false;
3814 if (operand_equal_p (e1, e2, 0))
3816 *var_present = false;
3817 return 0;
3819 *var_present = true;
3820 if (zero_p (e2))
3821 return force_var_cost (data, e1, depends_on);
3823 if (zero_p (e1))
3825 cost = force_var_cost (data, e2, depends_on);
3826 cost += multiply_by_cost (-1, mode);
3828 return cost;
3831 cost = force_var_cost (data, e1, depends_on);
3832 cost += force_var_cost (data, e2, depends_on);
3833 cost += add_cost (mode);
3835 return cost;
3838 /* Determines the cost of the computation by that USE is expressed
3839 from induction variable CAND. If ADDRESS_P is true, we just need
3840 to create an address from it, otherwise we want to get it into
3841 register. A set of invariants we depend on is stored in
3842 DEPENDS_ON. AT is the statement at that the value is computed. */
3844 static unsigned
3845 get_computation_cost_at (struct ivopts_data *data,
3846 struct iv_use *use, struct iv_cand *cand,
3847 bool address_p, bitmap *depends_on, tree at)
3849 tree ubase = use->iv->base, ustep = use->iv->step;
3850 tree cbase, cstep;
3851 tree utype = TREE_TYPE (ubase), ctype;
3852 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3853 HOST_WIDE_INT ratio, aratio;
3854 bool var_present, symbol_present;
3855 unsigned cost = 0, n_sums;
3857 *depends_on = NULL;
3859 /* Only consider real candidates. */
3860 if (!cand->iv)
3861 return INFTY;
3863 cbase = cand->iv->base;
3864 cstep = cand->iv->step;
3865 ctype = TREE_TYPE (cbase);
3867 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3869 /* We do not have a precision to express the values of use. */
3870 return INFTY;
3873 if (address_p)
3875 /* Do not try to express address of an object with computation based
3876 on address of a different object. This may cause problems in rtl
3877 level alias analysis (that does not expect this to be happening,
3878 as this is illegal in C), and would be unlikely to be useful
3879 anyway. */
3880 if (use->iv->base_object
3881 && cand->iv->base_object
3882 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3883 return INFTY;
3886 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3888 /* TODO -- add direct handling of this case. */
3889 goto fallback;
3892 /* CSTEPI is removed from the offset in case statement is after the
3893 increment. If the step is not constant, we use zero instead.
3894 This is a bit imprecise (there is the extra addition), but
3895 redundancy elimination is likely to transform the code so that
3896 it uses value of the variable before increment anyway,
3897 so it is not that much unrealistic. */
3898 if (cst_and_fits_in_hwi (cstep))
3899 cstepi = int_cst_value (cstep);
3900 else
3901 cstepi = 0;
3903 if (cst_and_fits_in_hwi (ustep)
3904 && cst_and_fits_in_hwi (cstep))
3906 ustepi = int_cst_value (ustep);
3908 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3909 return INFTY;
3911 else
3913 double_int rat;
3915 if (!constant_multiple_of (ustep, cstep, &rat))
3916 return INFTY;
3918 if (double_int_fits_in_shwi_p (rat))
3919 ratio = double_int_to_shwi (rat);
3920 else
3921 return INFTY;
3924 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3925 or ratio == 1, it is better to handle this like
3927 ubase - ratio * cbase + ratio * var
3929 (also holds in the case ratio == -1, TODO. */
3931 if (cst_and_fits_in_hwi (cbase))
3933 offset = - ratio * int_cst_value (cbase);
3934 cost += difference_cost (data,
3935 ubase, integer_zero_node,
3936 &symbol_present, &var_present, &offset,
3937 depends_on);
3939 else if (ratio == 1)
3941 cost += difference_cost (data,
3942 ubase, cbase,
3943 &symbol_present, &var_present, &offset,
3944 depends_on);
3946 else
3948 cost += force_var_cost (data, cbase, depends_on);
3949 cost += add_cost (TYPE_MODE (ctype));
3950 cost += difference_cost (data,
3951 ubase, integer_zero_node,
3952 &symbol_present, &var_present, &offset,
3953 depends_on);
3956 /* If we are after the increment, the value of the candidate is higher by
3957 one iteration. */
3958 if (stmt_after_increment (data->current_loop, cand, at))
3959 offset -= ratio * cstepi;
3961 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3962 (symbol/var/const parts may be omitted). If we are looking for an address,
3963 find the cost of addressing this. */
3964 if (address_p)
3965 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3967 /* Otherwise estimate the costs for computing the expression. */
3968 aratio = ratio > 0 ? ratio : -ratio;
3969 if (!symbol_present && !var_present && !offset)
3971 if (ratio != 1)
3972 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3974 return cost;
3977 if (aratio != 1)
3978 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3980 n_sums = 1;
3981 if (var_present
3982 /* Symbol + offset should be compile-time computable. */
3983 && (symbol_present || offset))
3984 n_sums++;
3986 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3988 fallback:
3990 /* Just get the expression, expand it and measure the cost. */
3991 tree comp = get_computation_at (data->current_loop, use, cand, at);
3993 if (!comp)
3994 return INFTY;
3996 if (address_p)
3997 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3999 return computation_cost (comp);
4003 /* Determines the cost of the computation by that USE is expressed
4004 from induction variable CAND. If ADDRESS_P is true, we just need
4005 to create an address from it, otherwise we want to get it into
4006 register. A set of invariants we depend on is stored in
4007 DEPENDS_ON. */
4009 static unsigned
4010 get_computation_cost (struct ivopts_data *data,
4011 struct iv_use *use, struct iv_cand *cand,
4012 bool address_p, bitmap *depends_on)
4014 return get_computation_cost_at (data,
4015 use, cand, address_p, depends_on, use->stmt);
4018 /* Determines cost of basing replacement of USE on CAND in a generic
4019 expression. */
4021 static bool
4022 determine_use_iv_cost_generic (struct ivopts_data *data,
4023 struct iv_use *use, struct iv_cand *cand)
4025 bitmap depends_on;
4026 unsigned cost;
4028 /* The simple case first -- if we need to express value of the preserved
4029 original biv, the cost is 0. This also prevents us from counting the
4030 cost of increment twice -- once at this use and once in the cost of
4031 the candidate. */
4032 if (cand->pos == IP_ORIGINAL
4033 && cand->incremented_at == use->stmt)
4035 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4036 return true;
4039 cost = get_computation_cost (data, use, cand, false, &depends_on);
4040 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4042 return cost != INFTY;
4045 /* Determines cost of basing replacement of USE on CAND in an address. */
4047 static bool
4048 determine_use_iv_cost_address (struct ivopts_data *data,
4049 struct iv_use *use, struct iv_cand *cand)
4051 bitmap depends_on;
4052 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4054 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056 return cost != INFTY;
4059 /* Computes value of induction variable IV in iteration NITER. */
4061 static tree
4062 iv_value (struct iv *iv, tree niter)
4064 tree val;
4065 tree type = TREE_TYPE (iv->base);
4067 niter = fold_convert (type, niter);
4068 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4070 return fold_build2 (PLUS_EXPR, type, iv->base, val);
4073 /* Computes value of candidate CAND at position AT in iteration NITER. */
4075 static tree
4076 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4078 tree val = iv_value (cand->iv, niter);
4079 tree type = TREE_TYPE (cand->iv->base);
4081 if (stmt_after_increment (loop, cand, at))
4082 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4084 return val;
4087 /* Returns period of induction variable iv. */
4089 static tree
4090 iv_period (struct iv *iv)
4092 tree step = iv->step, period, type;
4093 tree pow2div;
4095 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4097 /* Period of the iv is gcd (step, type range). Since type range is power
4098 of two, it suffices to determine the maximum power of two that divides
4099 step. */
4100 pow2div = num_ending_zeros (step);
4101 type = unsigned_type_for (TREE_TYPE (step));
4103 period = build_low_bits_mask (type,
4104 (TYPE_PRECISION (type)
4105 - tree_low_cst (pow2div, 1)));
4107 return period;
4110 /* Returns the comparison operator used when eliminating the iv USE. */
4112 static enum tree_code
4113 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4115 struct loop *loop = data->current_loop;
4116 basic_block ex_bb;
4117 edge exit;
4119 ex_bb = bb_for_stmt (use->stmt);
4120 exit = EDGE_SUCC (ex_bb, 0);
4121 if (flow_bb_inside_loop_p (loop, exit->dest))
4122 exit = EDGE_SUCC (ex_bb, 1);
4124 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4127 /* Check whether it is possible to express the condition in USE by comparison
4128 of candidate CAND. If so, store the value compared with to BOUND. */
4130 static bool
4131 may_eliminate_iv (struct ivopts_data *data,
4132 struct iv_use *use, struct iv_cand *cand, tree *bound)
4134 basic_block ex_bb;
4135 edge exit;
4136 tree nit, nit_type;
4137 tree wider_type, period, per_type;
4138 struct loop *loop = data->current_loop;
4140 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4141 return false;
4143 /* For now works only for exits that dominate the loop latch. TODO -- extend
4144 for other conditions inside loop body. */
4145 ex_bb = bb_for_stmt (use->stmt);
4146 if (use->stmt != last_stmt (ex_bb)
4147 || TREE_CODE (use->stmt) != COND_EXPR)
4148 return false;
4149 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4150 return false;
4152 exit = EDGE_SUCC (ex_bb, 0);
4153 if (flow_bb_inside_loop_p (loop, exit->dest))
4154 exit = EDGE_SUCC (ex_bb, 1);
4155 if (flow_bb_inside_loop_p (loop, exit->dest))
4156 return false;
4158 nit = niter_for_exit (data, exit);
4159 if (!nit)
4160 return false;
4162 nit_type = TREE_TYPE (nit);
4164 /* Determine whether we may use the variable to test whether niter iterations
4165 elapsed. This is the case iff the period of the induction variable is
4166 greater than the number of iterations. */
4167 period = iv_period (cand->iv);
4168 if (!period)
4169 return false;
4170 per_type = TREE_TYPE (period);
4172 wider_type = TREE_TYPE (period);
4173 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4174 wider_type = per_type;
4175 else
4176 wider_type = nit_type;
4178 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4179 fold_convert (wider_type, period),
4180 fold_convert (wider_type, nit))))
4181 return false;
4183 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4184 return true;
4187 /* Determines cost of basing replacement of USE on CAND in a condition. */
4189 static bool
4190 determine_use_iv_cost_condition (struct ivopts_data *data,
4191 struct iv_use *use, struct iv_cand *cand)
4193 tree bound = NULL_TREE, op, cond;
4194 bitmap depends_on = NULL;
4195 unsigned cost;
4197 /* Only consider real candidates. */
4198 if (!cand->iv)
4200 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4201 return false;
4204 if (may_eliminate_iv (data, use, cand, &bound))
4206 cost = force_var_cost (data, bound, &depends_on);
4208 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4209 return cost != INFTY;
4212 /* The induction variable elimination failed; just express the original
4213 giv. If it is compared with an invariant, note that we cannot get
4214 rid of it. */
4215 cost = get_computation_cost (data, use, cand, false, &depends_on);
4217 cond = *use->op_p;
4218 if (TREE_CODE (cond) != SSA_NAME)
4220 op = TREE_OPERAND (cond, 0);
4221 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4222 op = TREE_OPERAND (cond, 1);
4223 if (TREE_CODE (op) == SSA_NAME)
4225 op = get_iv (data, op)->base;
4226 fd_ivopts_data = data;
4227 walk_tree (&op, find_depends, &depends_on, NULL);
4231 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4232 return cost != INFTY;
4235 /* Determines cost of basing replacement of USE on CAND. Returns false
4236 if USE cannot be based on CAND. */
4238 static bool
4239 determine_use_iv_cost (struct ivopts_data *data,
4240 struct iv_use *use, struct iv_cand *cand)
4242 switch (use->type)
4244 case USE_NONLINEAR_EXPR:
4245 return determine_use_iv_cost_generic (data, use, cand);
4247 case USE_ADDRESS:
4248 return determine_use_iv_cost_address (data, use, cand);
4250 case USE_COMPARE:
4251 return determine_use_iv_cost_condition (data, use, cand);
4253 default:
4254 gcc_unreachable ();
4258 /* Determines costs of basing the use of the iv on an iv candidate. */
4260 static void
4261 determine_use_iv_costs (struct ivopts_data *data)
4263 unsigned i, j;
4264 struct iv_use *use;
4265 struct iv_cand *cand;
4266 bitmap to_clear = BITMAP_ALLOC (NULL);
4268 alloc_use_cost_map (data);
4270 for (i = 0; i < n_iv_uses (data); i++)
4272 use = iv_use (data, i);
4274 if (data->consider_all_candidates)
4276 for (j = 0; j < n_iv_cands (data); j++)
4278 cand = iv_cand (data, j);
4279 determine_use_iv_cost (data, use, cand);
4282 else
4284 bitmap_iterator bi;
4286 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4288 cand = iv_cand (data, j);
4289 if (!determine_use_iv_cost (data, use, cand))
4290 bitmap_set_bit (to_clear, j);
4293 /* Remove the candidates for that the cost is infinite from
4294 the list of related candidates. */
4295 bitmap_and_compl_into (use->related_cands, to_clear);
4296 bitmap_clear (to_clear);
4300 BITMAP_FREE (to_clear);
4302 if (dump_file && (dump_flags & TDF_DETAILS))
4304 fprintf (dump_file, "Use-candidate costs:\n");
4306 for (i = 0; i < n_iv_uses (data); i++)
4308 use = iv_use (data, i);
4310 fprintf (dump_file, "Use %d:\n", i);
4311 fprintf (dump_file, " cand\tcost\tdepends on\n");
4312 for (j = 0; j < use->n_map_members; j++)
4314 if (!use->cost_map[j].cand
4315 || use->cost_map[j].cost == INFTY)
4316 continue;
4318 fprintf (dump_file, " %d\t%d\t",
4319 use->cost_map[j].cand->id,
4320 use->cost_map[j].cost);
4321 if (use->cost_map[j].depends_on)
4322 bitmap_print (dump_file,
4323 use->cost_map[j].depends_on, "","");
4324 fprintf (dump_file, "\n");
4327 fprintf (dump_file, "\n");
4329 fprintf (dump_file, "\n");
4333 /* Determines cost of the candidate CAND. */
4335 static void
4336 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338 unsigned cost_base, cost_step;
4339 tree base;
4341 if (!cand->iv)
4343 cand->cost = 0;
4344 return;
4347 /* There are two costs associated with the candidate -- its increment
4348 and its initialization. The second is almost negligible for any loop
4349 that rolls enough, so we take it just very little into account. */
4351 base = cand->iv->base;
4352 cost_base = force_var_cost (data, base, NULL);
4353 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4355 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4357 /* Prefer the original iv unless we may gain something by replacing it;
4358 this is not really relevant for artificial ivs created by other
4359 passes. */
4360 if (cand->pos == IP_ORIGINAL
4361 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4362 cand->cost--;
4364 /* Prefer not to insert statements into latch unless there are some
4365 already (so that we do not create unnecessary jumps). */
4366 if (cand->pos == IP_END
4367 && empty_block_p (ip_end_pos (data->current_loop)))
4368 cand->cost++;
4371 /* Determines costs of computation of the candidates. */
4373 static void
4374 determine_iv_costs (struct ivopts_data *data)
4376 unsigned i;
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4380 fprintf (dump_file, "Candidate costs:\n");
4381 fprintf (dump_file, " cand\tcost\n");
4384 for (i = 0; i < n_iv_cands (data); i++)
4386 struct iv_cand *cand = iv_cand (data, i);
4388 determine_iv_cost (data, cand);
4390 if (dump_file && (dump_flags & TDF_DETAILS))
4391 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4394 if (dump_file && (dump_flags & TDF_DETAILS))
4395 fprintf (dump_file, "\n");
4398 /* Calculates cost for having SIZE induction variables. */
4400 static unsigned
4401 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4403 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4406 /* For each size of the induction variable set determine the penalty. */
4408 static void
4409 determine_set_costs (struct ivopts_data *data)
4411 unsigned j, n;
4412 tree phi, op;
4413 struct loop *loop = data->current_loop;
4414 bitmap_iterator bi;
4416 /* We use the following model (definitely improvable, especially the
4417 cost function -- TODO):
4419 We estimate the number of registers available (using MD data), name it A.
4421 We estimate the number of registers used by the loop, name it U. This
4422 number is obtained as the number of loop phi nodes (not counting virtual
4423 registers and bivs) + the number of variables from outside of the loop.
4425 We set a reserve R (free regs that are used for temporary computations,
4426 etc.). For now the reserve is a constant 3.
4428 Let I be the number of induction variables.
4430 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4431 make a lot of ivs without a reason).
4432 -- if A - R < U + I <= A, the cost is I * PRES_COST
4433 -- if U + I > A, the cost is I * PRES_COST and
4434 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4436 if (dump_file && (dump_flags & TDF_DETAILS))
4438 fprintf (dump_file, "Global costs:\n");
4439 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4440 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4441 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4442 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4445 n = 0;
4446 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4448 op = PHI_RESULT (phi);
4450 if (!is_gimple_reg (op))
4451 continue;
4453 if (get_iv (data, op))
4454 continue;
4456 n++;
4459 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4461 struct version_info *info = ver_info (data, j);
4463 if (info->inv_id && info->has_nonlin_use)
4464 n++;
4467 data->regs_used = n;
4468 if (dump_file && (dump_flags & TDF_DETAILS))
4469 fprintf (dump_file, " regs_used %d\n", n);
4471 if (dump_file && (dump_flags & TDF_DETAILS))
4473 fprintf (dump_file, " cost for size:\n");
4474 fprintf (dump_file, " ivs\tcost\n");
4475 for (j = 0; j <= 2 * target_avail_regs; j++)
4476 fprintf (dump_file, " %d\t%d\n", j,
4477 ivopts_global_cost_for_size (data, j));
4478 fprintf (dump_file, "\n");
4482 /* Returns true if A is a cheaper cost pair than B. */
4484 static bool
4485 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4487 if (!a)
4488 return false;
4490 if (!b)
4491 return true;
4493 if (a->cost < b->cost)
4494 return true;
4496 if (a->cost > b->cost)
4497 return false;
4499 /* In case the costs are the same, prefer the cheaper candidate. */
4500 if (a->cand->cost < b->cand->cost)
4501 return true;
4503 return false;
4506 /* Computes the cost field of IVS structure. */
4508 static void
4509 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4511 unsigned cost = 0;
4513 cost += ivs->cand_use_cost;
4514 cost += ivs->cand_cost;
4515 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4517 ivs->cost = cost;
4520 /* Remove invariants in set INVS to set IVS. */
4522 static void
4523 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4525 bitmap_iterator bi;
4526 unsigned iid;
4528 if (!invs)
4529 return;
4531 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533 ivs->n_invariant_uses[iid]--;
4534 if (ivs->n_invariant_uses[iid] == 0)
4535 ivs->n_regs--;
4539 /* Set USE not to be expressed by any candidate in IVS. */
4541 static void
4542 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543 struct iv_use *use)
4545 unsigned uid = use->id, cid;
4546 struct cost_pair *cp;
4548 cp = ivs->cand_for_use[uid];
4549 if (!cp)
4550 return;
4551 cid = cp->cand->id;
4553 ivs->bad_uses++;
4554 ivs->cand_for_use[uid] = NULL;
4555 ivs->n_cand_uses[cid]--;
4557 if (ivs->n_cand_uses[cid] == 0)
4559 bitmap_clear_bit (ivs->cands, cid);
4560 /* Do not count the pseudocandidates. */
4561 if (cp->cand->iv)
4562 ivs->n_regs--;
4563 ivs->n_cands--;
4564 ivs->cand_cost -= cp->cand->cost;
4566 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4569 ivs->cand_use_cost -= cp->cost;
4571 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4572 iv_ca_recount_cost (data, ivs);
4575 /* Add invariants in set INVS to set IVS. */
4577 static void
4578 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4580 bitmap_iterator bi;
4581 unsigned iid;
4583 if (!invs)
4584 return;
4586 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4588 ivs->n_invariant_uses[iid]++;
4589 if (ivs->n_invariant_uses[iid] == 1)
4590 ivs->n_regs++;
4594 /* Set cost pair for USE in set IVS to CP. */
4596 static void
4597 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4598 struct iv_use *use, struct cost_pair *cp)
4600 unsigned uid = use->id, cid;
4602 if (ivs->cand_for_use[uid] == cp)
4603 return;
4605 if (ivs->cand_for_use[uid])
4606 iv_ca_set_no_cp (data, ivs, use);
4608 if (cp)
4610 cid = cp->cand->id;
4612 ivs->bad_uses--;
4613 ivs->cand_for_use[uid] = cp;
4614 ivs->n_cand_uses[cid]++;
4615 if (ivs->n_cand_uses[cid] == 1)
4617 bitmap_set_bit (ivs->cands, cid);
4618 /* Do not count the pseudocandidates. */
4619 if (cp->cand->iv)
4620 ivs->n_regs++;
4621 ivs->n_cands++;
4622 ivs->cand_cost += cp->cand->cost;
4624 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4627 ivs->cand_use_cost += cp->cost;
4628 iv_ca_set_add_invariants (ivs, cp->depends_on);
4629 iv_ca_recount_cost (data, ivs);
4633 /* Extend set IVS by expressing USE by some of the candidates in it
4634 if possible. */
4636 static void
4637 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4638 struct iv_use *use)
4640 struct cost_pair *best_cp = NULL, *cp;
4641 bitmap_iterator bi;
4642 unsigned i;
4644 gcc_assert (ivs->upto >= use->id);
4646 if (ivs->upto == use->id)
4648 ivs->upto++;
4649 ivs->bad_uses++;
4652 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4654 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4656 if (cheaper_cost_pair (cp, best_cp))
4657 best_cp = cp;
4660 iv_ca_set_cp (data, ivs, use, best_cp);
4663 /* Get cost for assignment IVS. */
4665 static unsigned
4666 iv_ca_cost (struct iv_ca *ivs)
4668 return (ivs->bad_uses ? INFTY : ivs->cost);
4671 /* Returns true if all dependences of CP are among invariants in IVS. */
4673 static bool
4674 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4676 unsigned i;
4677 bitmap_iterator bi;
4679 if (!cp->depends_on)
4680 return true;
4682 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4684 if (ivs->n_invariant_uses[i] == 0)
4685 return false;
4688 return true;
4691 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4692 it before NEXT_CHANGE. */
4694 static struct iv_ca_delta *
4695 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4696 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4698 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4700 change->use = use;
4701 change->old_cp = old_cp;
4702 change->new_cp = new_cp;
4703 change->next_change = next_change;
4705 return change;
4708 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4709 are rewritten. */
4711 static struct iv_ca_delta *
4712 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4714 struct iv_ca_delta *last;
4716 if (!l2)
4717 return l1;
4719 if (!l1)
4720 return l2;
4722 for (last = l1; last->next_change; last = last->next_change)
4723 continue;
4724 last->next_change = l2;
4726 return l1;
4729 /* Returns candidate by that USE is expressed in IVS. */
4731 static struct cost_pair *
4732 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4734 return ivs->cand_for_use[use->id];
4737 /* Reverse the list of changes DELTA, forming the inverse to it. */
4739 static struct iv_ca_delta *
4740 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4742 struct iv_ca_delta *act, *next, *prev = NULL;
4743 struct cost_pair *tmp;
4745 for (act = delta; act; act = next)
4747 next = act->next_change;
4748 act->next_change = prev;
4749 prev = act;
4751 tmp = act->old_cp;
4752 act->old_cp = act->new_cp;
4753 act->new_cp = tmp;
4756 return prev;
4759 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4760 reverted instead. */
4762 static void
4763 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4764 struct iv_ca_delta *delta, bool forward)
4766 struct cost_pair *from, *to;
4767 struct iv_ca_delta *act;
4769 if (!forward)
4770 delta = iv_ca_delta_reverse (delta);
4772 for (act = delta; act; act = act->next_change)
4774 from = act->old_cp;
4775 to = act->new_cp;
4776 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4777 iv_ca_set_cp (data, ivs, act->use, to);
4780 if (!forward)
4781 iv_ca_delta_reverse (delta);
4784 /* Returns true if CAND is used in IVS. */
4786 static bool
4787 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4789 return ivs->n_cand_uses[cand->id] > 0;
4792 /* Returns number of induction variable candidates in the set IVS. */
4794 static unsigned
4795 iv_ca_n_cands (struct iv_ca *ivs)
4797 return ivs->n_cands;
4800 /* Free the list of changes DELTA. */
4802 static void
4803 iv_ca_delta_free (struct iv_ca_delta **delta)
4805 struct iv_ca_delta *act, *next;
4807 for (act = *delta; act; act = next)
4809 next = act->next_change;
4810 free (act);
4813 *delta = NULL;
4816 /* Allocates new iv candidates assignment. */
4818 static struct iv_ca *
4819 iv_ca_new (struct ivopts_data *data)
4821 struct iv_ca *nw = XNEW (struct iv_ca);
4823 nw->upto = 0;
4824 nw->bad_uses = 0;
4825 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4826 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4827 nw->cands = BITMAP_ALLOC (NULL);
4828 nw->n_cands = 0;
4829 nw->n_regs = 0;
4830 nw->cand_use_cost = 0;
4831 nw->cand_cost = 0;
4832 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4833 nw->cost = 0;
4835 return nw;
4838 /* Free memory occupied by the set IVS. */
4840 static void
4841 iv_ca_free (struct iv_ca **ivs)
4843 free ((*ivs)->cand_for_use);
4844 free ((*ivs)->n_cand_uses);
4845 BITMAP_FREE ((*ivs)->cands);
4846 free ((*ivs)->n_invariant_uses);
4847 free (*ivs);
4848 *ivs = NULL;
4851 /* Dumps IVS to FILE. */
4853 static void
4854 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4856 const char *pref = " invariants ";
4857 unsigned i;
4859 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4860 bitmap_print (file, ivs->cands, " candidates ","\n");
4862 for (i = 1; i <= data->max_inv_id; i++)
4863 if (ivs->n_invariant_uses[i])
4865 fprintf (file, "%s%d", pref, i);
4866 pref = ", ";
4868 fprintf (file, "\n");
4871 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4872 new set, and store differences in DELTA. Number of induction variables
4873 in the new set is stored to N_IVS. */
4875 static unsigned
4876 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4877 struct iv_cand *cand, struct iv_ca_delta **delta,
4878 unsigned *n_ivs)
4880 unsigned i, cost;
4881 struct iv_use *use;
4882 struct cost_pair *old_cp, *new_cp;
4884 *delta = NULL;
4885 for (i = 0; i < ivs->upto; i++)
4887 use = iv_use (data, i);
4888 old_cp = iv_ca_cand_for_use (ivs, use);
4890 if (old_cp
4891 && old_cp->cand == cand)
4892 continue;
4894 new_cp = get_use_iv_cost (data, use, cand);
4895 if (!new_cp)
4896 continue;
4898 if (!iv_ca_has_deps (ivs, new_cp))
4899 continue;
4901 if (!cheaper_cost_pair (new_cp, old_cp))
4902 continue;
4904 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4907 iv_ca_delta_commit (data, ivs, *delta, true);
4908 cost = iv_ca_cost (ivs);
4909 if (n_ivs)
4910 *n_ivs = iv_ca_n_cands (ivs);
4911 iv_ca_delta_commit (data, ivs, *delta, false);
4913 return cost;
4916 /* Try narrowing set IVS by removing CAND. Return the cost of
4917 the new set and store the differences in DELTA. */
4919 static unsigned
4920 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4921 struct iv_cand *cand, struct iv_ca_delta **delta)
4923 unsigned i, ci;
4924 struct iv_use *use;
4925 struct cost_pair *old_cp, *new_cp, *cp;
4926 bitmap_iterator bi;
4927 struct iv_cand *cnd;
4928 unsigned cost;
4930 *delta = NULL;
4931 for (i = 0; i < n_iv_uses (data); i++)
4933 use = iv_use (data, i);
4935 old_cp = iv_ca_cand_for_use (ivs, use);
4936 if (old_cp->cand != cand)
4937 continue;
4939 new_cp = NULL;
4941 if (data->consider_all_candidates)
4943 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4945 if (ci == cand->id)
4946 continue;
4948 cnd = iv_cand (data, ci);
4950 cp = get_use_iv_cost (data, use, cnd);
4951 if (!cp)
4952 continue;
4953 if (!iv_ca_has_deps (ivs, cp))
4954 continue;
4956 if (!cheaper_cost_pair (cp, new_cp))
4957 continue;
4959 new_cp = cp;
4962 else
4964 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4966 if (ci == cand->id)
4967 continue;
4969 cnd = iv_cand (data, ci);
4971 cp = get_use_iv_cost (data, use, cnd);
4972 if (!cp)
4973 continue;
4974 if (!iv_ca_has_deps (ivs, cp))
4975 continue;
4977 if (!cheaper_cost_pair (cp, new_cp))
4978 continue;
4980 new_cp = cp;
4984 if (!new_cp)
4986 iv_ca_delta_free (delta);
4987 return INFTY;
4990 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4993 iv_ca_delta_commit (data, ivs, *delta, true);
4994 cost = iv_ca_cost (ivs);
4995 iv_ca_delta_commit (data, ivs, *delta, false);
4997 return cost;
5000 /* Try optimizing the set of candidates IVS by removing candidates different
5001 from to EXCEPT_CAND from it. Return cost of the new set, and store
5002 differences in DELTA. */
5004 static unsigned
5005 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5006 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5008 bitmap_iterator bi;
5009 struct iv_ca_delta *act_delta, *best_delta;
5010 unsigned i, best_cost, acost;
5011 struct iv_cand *cand;
5013 best_delta = NULL;
5014 best_cost = iv_ca_cost (ivs);
5016 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5018 cand = iv_cand (data, i);
5020 if (cand == except_cand)
5021 continue;
5023 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5025 if (acost < best_cost)
5027 best_cost = acost;
5028 iv_ca_delta_free (&best_delta);
5029 best_delta = act_delta;
5031 else
5032 iv_ca_delta_free (&act_delta);
5035 if (!best_delta)
5037 *delta = NULL;
5038 return best_cost;
5041 /* Recurse to possibly remove other unnecessary ivs. */
5042 iv_ca_delta_commit (data, ivs, best_delta, true);
5043 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5044 iv_ca_delta_commit (data, ivs, best_delta, false);
5045 *delta = iv_ca_delta_join (best_delta, *delta);
5046 return best_cost;
5049 /* Tries to extend the sets IVS in the best possible way in order
5050 to express the USE. */
5052 static bool
5053 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5054 struct iv_use *use)
5056 unsigned best_cost, act_cost;
5057 unsigned i;
5058 bitmap_iterator bi;
5059 struct iv_cand *cand;
5060 struct iv_ca_delta *best_delta = NULL, *act_delta;
5061 struct cost_pair *cp;
5063 iv_ca_add_use (data, ivs, use);
5064 best_cost = iv_ca_cost (ivs);
5066 cp = iv_ca_cand_for_use (ivs, use);
5067 if (cp)
5069 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5070 iv_ca_set_no_cp (data, ivs, use);
5073 /* First try important candidates. Only if it fails, try the specific ones.
5074 Rationale -- in loops with many variables the best choice often is to use
5075 just one generic biv. If we added here many ivs specific to the uses,
5076 the optimization algorithm later would be likely to get stuck in a local
5077 minimum, thus causing us to create too many ivs. The approach from
5078 few ivs to more seems more likely to be successful -- starting from few
5079 ivs, replacing an expensive use by a specific iv should always be a
5080 win. */
5081 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5083 cand = iv_cand (data, i);
5085 if (iv_ca_cand_used_p (ivs, cand))
5086 continue;
5088 cp = get_use_iv_cost (data, use, cand);
5089 if (!cp)
5090 continue;
5092 iv_ca_set_cp (data, ivs, use, cp);
5093 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5094 iv_ca_set_no_cp (data, ivs, use);
5095 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097 if (act_cost < best_cost)
5099 best_cost = act_cost;
5101 iv_ca_delta_free (&best_delta);
5102 best_delta = act_delta;
5104 else
5105 iv_ca_delta_free (&act_delta);
5108 if (best_cost == INFTY)
5110 for (i = 0; i < use->n_map_members; i++)
5112 cp = use->cost_map + i;
5113 cand = cp->cand;
5114 if (!cand)
5115 continue;
5117 /* Already tried this. */
5118 if (cand->important)
5119 continue;
5121 if (iv_ca_cand_used_p (ivs, cand))
5122 continue;
5124 act_delta = NULL;
5125 iv_ca_set_cp (data, ivs, use, cp);
5126 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5127 iv_ca_set_no_cp (data, ivs, use);
5128 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5129 cp, act_delta);
5131 if (act_cost < best_cost)
5133 best_cost = act_cost;
5135 if (best_delta)
5136 iv_ca_delta_free (&best_delta);
5137 best_delta = act_delta;
5139 else
5140 iv_ca_delta_free (&act_delta);
5144 iv_ca_delta_commit (data, ivs, best_delta, true);
5145 iv_ca_delta_free (&best_delta);
5147 return (best_cost != INFTY);
5150 /* Finds an initial assignment of candidates to uses. */
5152 static struct iv_ca *
5153 get_initial_solution (struct ivopts_data *data)
5155 struct iv_ca *ivs = iv_ca_new (data);
5156 unsigned i;
5158 for (i = 0; i < n_iv_uses (data); i++)
5159 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161 iv_ca_free (&ivs);
5162 return NULL;
5165 return ivs;
5168 /* Tries to improve set of induction variables IVS. */
5170 static bool
5171 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5174 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5175 struct iv_cand *cand;
5177 /* Try extending the set of induction variables by one. */
5178 for (i = 0; i < n_iv_cands (data); i++)
5180 cand = iv_cand (data, i);
5182 if (iv_ca_cand_used_p (ivs, cand))
5183 continue;
5185 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5186 if (!act_delta)
5187 continue;
5189 /* If we successfully added the candidate and the set is small enough,
5190 try optimizing it by removing other candidates. */
5191 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193 iv_ca_delta_commit (data, ivs, act_delta, true);
5194 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5195 iv_ca_delta_commit (data, ivs, act_delta, false);
5196 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5199 if (acost < best_cost)
5201 best_cost = acost;
5202 iv_ca_delta_free (&best_delta);
5203 best_delta = act_delta;
5205 else
5206 iv_ca_delta_free (&act_delta);
5209 if (!best_delta)
5211 /* Try removing the candidates from the set instead. */
5212 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214 /* Nothing more we can do. */
5215 if (!best_delta)
5216 return false;
5219 iv_ca_delta_commit (data, ivs, best_delta, true);
5220 gcc_assert (best_cost == iv_ca_cost (ivs));
5221 iv_ca_delta_free (&best_delta);
5222 return true;
5225 /* Attempts to find the optimal set of induction variables. We do simple
5226 greedy heuristic -- we try to replace at most one candidate in the selected
5227 solution and remove the unused ivs while this improves the cost. */
5229 static struct iv_ca *
5230 find_optimal_iv_set (struct ivopts_data *data)
5232 unsigned i;
5233 struct iv_ca *set;
5234 struct iv_use *use;
5236 /* Get the initial solution. */
5237 set = get_initial_solution (data);
5238 if (!set)
5240 if (dump_file && (dump_flags & TDF_DETAILS))
5241 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5242 return NULL;
5245 if (dump_file && (dump_flags & TDF_DETAILS))
5247 fprintf (dump_file, "Initial set of candidates:\n");
5248 iv_ca_dump (data, dump_file, set);
5251 while (try_improve_iv_set (data, set))
5253 if (dump_file && (dump_flags & TDF_DETAILS))
5255 fprintf (dump_file, "Improved to:\n");
5256 iv_ca_dump (data, dump_file, set);
5260 if (dump_file && (dump_flags & TDF_DETAILS))
5261 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5263 for (i = 0; i < n_iv_uses (data); i++)
5265 use = iv_use (data, i);
5266 use->selected = iv_ca_cand_for_use (set, use)->cand;
5269 return set;
5272 /* Creates a new induction variable corresponding to CAND. */
5274 static void
5275 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5277 block_stmt_iterator incr_pos;
5278 tree base;
5279 bool after = false;
5281 if (!cand->iv)
5282 return;
5284 switch (cand->pos)
5286 case IP_NORMAL:
5287 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5288 break;
5290 case IP_END:
5291 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5292 after = true;
5293 break;
5295 case IP_ORIGINAL:
5296 /* Mark that the iv is preserved. */
5297 name_info (data, cand->var_before)->preserve_biv = true;
5298 name_info (data, cand->var_after)->preserve_biv = true;
5300 /* Rewrite the increment so that it uses var_before directly. */
5301 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5303 return;
5306 gimple_add_tmp_var (cand->var_before);
5307 add_referenced_var (cand->var_before);
5309 base = unshare_expr (cand->iv->base);
5311 create_iv (base, unshare_expr (cand->iv->step),
5312 cand->var_before, data->current_loop,
5313 &incr_pos, after, &cand->var_before, &cand->var_after);
5316 /* Creates new induction variables described in SET. */
5318 static void
5319 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5321 unsigned i;
5322 struct iv_cand *cand;
5323 bitmap_iterator bi;
5325 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5327 cand = iv_cand (data, i);
5328 create_new_iv (data, cand);
5332 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5333 is true, remove also the ssa name defined by the statement. */
5335 static void
5336 remove_statement (tree stmt, bool including_defined_name)
5338 if (TREE_CODE (stmt) == PHI_NODE)
5340 if (!including_defined_name)
5342 /* Prevent the ssa name defined by the statement from being removed. */
5343 SET_PHI_RESULT (stmt, NULL);
5345 remove_phi_node (stmt, NULL_TREE);
5347 else
5349 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5351 bsi_remove (&bsi, true);
5355 /* Rewrites USE (definition of iv used in a nonlinear expression)
5356 using candidate CAND. */
5358 static void
5359 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5360 struct iv_use *use, struct iv_cand *cand)
5362 tree comp;
5363 tree op, stmts, tgt, ass;
5364 block_stmt_iterator bsi, pbsi;
5366 /* An important special case -- if we are asked to express value of
5367 the original iv by itself, just exit; there is no need to
5368 introduce a new computation (that might also need casting the
5369 variable to unsigned and back). */
5370 if (cand->pos == IP_ORIGINAL
5371 && cand->incremented_at == use->stmt)
5373 tree step, ctype, utype;
5374 enum tree_code incr_code = PLUS_EXPR;
5376 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5377 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5379 step = cand->iv->step;
5380 ctype = TREE_TYPE (step);
5381 utype = TREE_TYPE (cand->var_after);
5382 if (TREE_CODE (step) == NEGATE_EXPR)
5384 incr_code = MINUS_EXPR;
5385 step = TREE_OPERAND (step, 0);
5388 /* Check whether we may leave the computation unchanged.
5389 This is the case only if it does not rely on other
5390 computations in the loop -- otherwise, the computation
5391 we rely upon may be removed in remove_unused_ivs,
5392 thus leading to ICE. */
5393 op = TREE_OPERAND (use->stmt, 1);
5394 if (TREE_CODE (op) == PLUS_EXPR
5395 || TREE_CODE (op) == MINUS_EXPR)
5397 if (TREE_OPERAND (op, 0) == cand->var_before)
5398 op = TREE_OPERAND (op, 1);
5399 else if (TREE_CODE (op) == PLUS_EXPR
5400 && TREE_OPERAND (op, 1) == cand->var_before)
5401 op = TREE_OPERAND (op, 0);
5402 else
5403 op = NULL_TREE;
5405 else
5406 op = NULL_TREE;
5408 if (op
5409 && (TREE_CODE (op) == INTEGER_CST
5410 || operand_equal_p (op, step, 0)))
5411 return;
5413 /* Otherwise, add the necessary computations to express
5414 the iv. */
5415 op = fold_convert (ctype, cand->var_before);
5416 comp = fold_convert (utype,
5417 build2 (incr_code, ctype, op,
5418 unshare_expr (step)));
5420 else
5421 comp = get_computation (data->current_loop, use, cand);
5423 switch (TREE_CODE (use->stmt))
5425 case PHI_NODE:
5426 tgt = PHI_RESULT (use->stmt);
5428 /* If we should keep the biv, do not replace it. */
5429 if (name_info (data, tgt)->preserve_biv)
5430 return;
5432 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5433 while (!bsi_end_p (pbsi)
5434 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5436 bsi = pbsi;
5437 bsi_next (&pbsi);
5439 break;
5441 case MODIFY_EXPR:
5442 tgt = TREE_OPERAND (use->stmt, 0);
5443 bsi = bsi_for_stmt (use->stmt);
5444 break;
5446 default:
5447 gcc_unreachable ();
5450 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5452 if (TREE_CODE (use->stmt) == PHI_NODE)
5454 if (stmts)
5455 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5456 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5457 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5458 remove_statement (use->stmt, false);
5459 SSA_NAME_DEF_STMT (tgt) = ass;
5461 else
5463 if (stmts)
5464 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5465 TREE_OPERAND (use->stmt, 1) = op;
5469 /* Replaces ssa name in index IDX by its basic variable. Callback for
5470 for_each_index. */
5472 static bool
5473 idx_remove_ssa_names (tree base, tree *idx,
5474 void *data ATTRIBUTE_UNUSED)
5476 tree *op;
5478 if (TREE_CODE (*idx) == SSA_NAME)
5479 *idx = SSA_NAME_VAR (*idx);
5481 if (TREE_CODE (base) == ARRAY_REF)
5483 op = &TREE_OPERAND (base, 2);
5484 if (*op
5485 && TREE_CODE (*op) == SSA_NAME)
5486 *op = SSA_NAME_VAR (*op);
5487 op = &TREE_OPERAND (base, 3);
5488 if (*op
5489 && TREE_CODE (*op) == SSA_NAME)
5490 *op = SSA_NAME_VAR (*op);
5493 return true;
5496 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5498 static tree
5499 unshare_and_remove_ssa_names (tree ref)
5501 ref = unshare_expr (ref);
5502 for_each_index (&ref, idx_remove_ssa_names, NULL);
5504 return ref;
5507 /* Extract the alias analysis info for the memory reference REF. There are
5508 several ways how this information may be stored and what precisely is
5509 its semantics depending on the type of the reference, but there always is
5510 somewhere hidden one _DECL node that is used to determine the set of
5511 virtual operands for the reference. The code below deciphers this jungle
5512 and extracts this single useful piece of information. */
5514 static tree
5515 get_ref_tag (tree ref, tree orig)
5517 tree var = get_base_address (ref);
5518 tree aref = NULL_TREE, tag, sv;
5519 HOST_WIDE_INT offset, size, maxsize;
5521 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5523 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5524 if (ref)
5525 break;
5528 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5529 return unshare_expr (sv);
5531 if (!var)
5532 return NULL_TREE;
5534 if (TREE_CODE (var) == INDIRECT_REF)
5536 /* If the base is a dereference of a pointer, first check its name memory
5537 tag. If it does not have one, use its symbol memory tag. */
5538 var = TREE_OPERAND (var, 0);
5539 if (TREE_CODE (var) != SSA_NAME)
5540 return NULL_TREE;
5542 if (SSA_NAME_PTR_INFO (var))
5544 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5545 if (tag)
5546 return tag;
5549 var = SSA_NAME_VAR (var);
5550 tag = var_ann (var)->symbol_mem_tag;
5551 gcc_assert (tag != NULL_TREE);
5552 return tag;
5554 else
5556 if (!DECL_P (var))
5557 return NULL_TREE;
5559 tag = var_ann (var)->symbol_mem_tag;
5560 if (tag)
5561 return tag;
5563 return var;
5567 /* Copies the reference information from OLD_REF to NEW_REF. */
5569 static void
5570 copy_ref_info (tree new_ref, tree old_ref)
5572 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5573 copy_mem_ref_info (new_ref, old_ref);
5574 else
5576 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5577 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5581 /* Rewrites USE (address that is an iv) using candidate CAND. */
5583 static void
5584 rewrite_use_address (struct ivopts_data *data,
5585 struct iv_use *use, struct iv_cand *cand)
5587 struct affine_tree_combination aff;
5588 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5589 tree ref;
5591 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5592 unshare_aff_combination (&aff);
5594 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5595 copy_ref_info (ref, *use->op_p);
5596 *use->op_p = ref;
5599 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5600 candidate CAND. */
5602 static void
5603 rewrite_use_compare (struct ivopts_data *data,
5604 struct iv_use *use, struct iv_cand *cand)
5606 tree comp;
5607 tree *op_p, cond, op, stmts, bound;
5608 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5609 enum tree_code compare;
5610 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5612 bound = cp->value;
5613 if (bound)
5615 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5616 tree var_type = TREE_TYPE (var);
5618 compare = iv_elimination_compare (data, use);
5619 bound = fold_convert (var_type, bound);
5620 op = force_gimple_operand (unshare_expr (bound), &stmts,
5621 true, NULL_TREE);
5623 if (stmts)
5624 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5626 *use->op_p = build2 (compare, boolean_type_node, var, op);
5627 update_stmt (use->stmt);
5628 return;
5631 /* The induction variable elimination failed; just express the original
5632 giv. */
5633 comp = get_computation (data->current_loop, use, cand);
5635 cond = *use->op_p;
5636 op_p = &TREE_OPERAND (cond, 0);
5637 if (TREE_CODE (*op_p) != SSA_NAME
5638 || zero_p (get_iv (data, *op_p)->step))
5639 op_p = &TREE_OPERAND (cond, 1);
5641 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5642 if (stmts)
5643 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5645 *op_p = op;
5648 /* Rewrites USE using candidate CAND. */
5650 static void
5651 rewrite_use (struct ivopts_data *data,
5652 struct iv_use *use, struct iv_cand *cand)
5654 switch (use->type)
5656 case USE_NONLINEAR_EXPR:
5657 rewrite_use_nonlinear_expr (data, use, cand);
5658 break;
5660 case USE_ADDRESS:
5661 rewrite_use_address (data, use, cand);
5662 break;
5664 case USE_COMPARE:
5665 rewrite_use_compare (data, use, cand);
5666 break;
5668 default:
5669 gcc_unreachable ();
5671 mark_new_vars_to_rename (use->stmt);
5674 /* Rewrite the uses using the selected induction variables. */
5676 static void
5677 rewrite_uses (struct ivopts_data *data)
5679 unsigned i;
5680 struct iv_cand *cand;
5681 struct iv_use *use;
5683 for (i = 0; i < n_iv_uses (data); i++)
5685 use = iv_use (data, i);
5686 cand = use->selected;
5687 gcc_assert (cand);
5689 rewrite_use (data, use, cand);
5693 /* Removes the ivs that are not used after rewriting. */
5695 static void
5696 remove_unused_ivs (struct ivopts_data *data)
5698 unsigned j;
5699 bitmap_iterator bi;
5701 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703 struct version_info *info;
5705 info = ver_info (data, j);
5706 if (info->iv
5707 && !zero_p (info->iv->step)
5708 && !info->inv_id
5709 && !info->iv->have_use_for
5710 && !info->preserve_biv)
5711 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5715 /* Frees data allocated by the optimization of a single loop. */
5717 static void
5718 free_loop_data (struct ivopts_data *data)
5720 unsigned i, j;
5721 bitmap_iterator bi;
5722 tree obj;
5724 htab_empty (data->niters);
5726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728 struct version_info *info;
5730 info = ver_info (data, i);
5731 if (info->iv)
5732 free (info->iv);
5733 info->iv = NULL;
5734 info->has_nonlin_use = false;
5735 info->preserve_biv = false;
5736 info->inv_id = 0;
5738 bitmap_clear (data->relevant);
5739 bitmap_clear (data->important_candidates);
5741 for (i = 0; i < n_iv_uses (data); i++)
5743 struct iv_use *use = iv_use (data, i);
5745 free (use->iv);
5746 BITMAP_FREE (use->related_cands);
5747 for (j = 0; j < use->n_map_members; j++)
5748 if (use->cost_map[j].depends_on)
5749 BITMAP_FREE (use->cost_map[j].depends_on);
5750 free (use->cost_map);
5751 free (use);
5753 VEC_truncate (iv_use_p, data->iv_uses, 0);
5755 for (i = 0; i < n_iv_cands (data); i++)
5757 struct iv_cand *cand = iv_cand (data, i);
5759 if (cand->iv)
5760 free (cand->iv);
5761 if (cand->depends_on)
5762 BITMAP_FREE (cand->depends_on);
5763 free (cand);
5765 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767 if (data->version_info_size < num_ssa_names)
5769 data->version_info_size = 2 * num_ssa_names;
5770 free (data->version_info);
5771 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5774 data->max_inv_id = 0;
5776 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5777 SET_DECL_RTL (obj, NULL_RTX);
5779 VEC_truncate (tree, decl_rtl_to_reset, 0);
5782 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5783 loop tree. */
5785 static void
5786 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788 free_loop_data (data);
5789 free (data->version_info);
5790 BITMAP_FREE (data->relevant);
5791 BITMAP_FREE (data->important_candidates);
5792 htab_delete (data->niters);
5794 VEC_free (tree, heap, decl_rtl_to_reset);
5795 VEC_free (iv_use_p, heap, data->iv_uses);
5796 VEC_free (iv_cand_p, heap, data->iv_candidates);
5799 /* Optimizes the LOOP. Returns true if anything changed. */
5801 static bool
5802 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5804 bool changed = false;
5805 struct iv_ca *iv_ca;
5806 edge exit;
5808 data->current_loop = loop;
5810 if (dump_file && (dump_flags & TDF_DETAILS))
5812 fprintf (dump_file, "Processing loop %d\n", loop->num);
5814 exit = single_dom_exit (loop);
5815 if (exit)
5817 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5818 exit->src->index, exit->dest->index);
5819 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5820 fprintf (dump_file, "\n");
5823 fprintf (dump_file, "\n");
5826 /* For each ssa name determines whether it behaves as an induction variable
5827 in some loop. */
5828 if (!find_induction_variables (data))
5829 goto finish;
5831 /* Finds interesting uses (item 1). */
5832 find_interesting_uses (data);
5833 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5834 goto finish;
5836 /* Finds candidates for the induction variables (item 2). */
5837 find_iv_candidates (data);
5839 /* Calculates the costs (item 3, part 1). */
5840 determine_use_iv_costs (data);
5841 determine_iv_costs (data);
5842 determine_set_costs (data);
5844 /* Find the optimal set of induction variables (item 3, part 2). */
5845 iv_ca = find_optimal_iv_set (data);
5846 if (!iv_ca)
5847 goto finish;
5848 changed = true;
5850 /* Create the new induction variables (item 4, part 1). */
5851 create_new_ivs (data, iv_ca);
5852 iv_ca_free (&iv_ca);
5854 /* Rewrite the uses (item 4, part 2). */
5855 rewrite_uses (data);
5857 /* Remove the ivs that are unused after rewriting. */
5858 remove_unused_ivs (data);
5860 /* We have changed the structure of induction variables; it might happen
5861 that definitions in the scev database refer to some of them that were
5862 eliminated. */
5863 scev_reset ();
5865 finish:
5866 free_loop_data (data);
5868 return changed;
5871 /* Main entry point. Optimizes induction variables in LOOPS. */
5873 void
5874 tree_ssa_iv_optimize (struct loops *loops)
5876 struct loop *loop;
5877 struct ivopts_data data;
5879 tree_ssa_iv_optimize_init (&data);
5881 /* Optimize the loops starting with the innermost ones. */
5882 loop = loops->tree_root;
5883 while (loop->inner)
5884 loop = loop->inner;
5886 /* Scan the loops, inner ones first. */
5887 while (loop != loops->tree_root)
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5890 flow_loop_dump (loop, dump_file, NULL, 1);
5892 tree_ssa_iv_optimize_loop (&data, loop);
5894 if (loop->next)
5896 loop = loop->next;
5897 while (loop->inner)
5898 loop = loop->inner;
5900 else
5901 loop = loop->outer;
5904 tree_ssa_iv_optimize_finalize (&data);