* lib/target-supports.exp
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob65f1b84df14e99f602408e9e77e04f2eb2a0de7e
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
94 /* The infinite cost. */
95 #define INFTY 10000000
97 /* The expected number of loop iterations. TODO -- use profiling instead of
98 this. */
99 #define AVG_LOOP_NITER(LOOP) 5
102 /* Representation of the induction variable. */
103 struct iv
105 tree base; /* Initial value of the iv. */
106 tree base_object; /* A memory object to that the induction variable points. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Types of uses. */
126 enum use_type
128 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
129 USE_ADDRESS, /* Use in an address. */
130 USE_COMPARE /* Use is a compare. */
133 /* The candidate - cost pair. */
134 struct cost_pair
136 struct iv_cand *cand; /* The candidate. */
137 unsigned cost; /* The cost. */
138 bitmap depends_on; /* The list of invariants that have to be
139 preserved. */
140 tree value; /* For final value elimination, the expression for
141 the final value of the iv. For iv elimination,
142 the new bound to compare with. */
145 /* Use. */
146 struct iv_use
148 unsigned id; /* The id of the use. */
149 enum use_type type; /* Type of the use. */
150 struct iv *iv; /* The induction variable it is based on. */
151 tree stmt; /* Statement in that it occurs. */
152 tree *op_p; /* The place where it occurs. */
153 bitmap related_cands; /* The set of "related" iv candidates, plus the common
154 important ones. */
156 unsigned n_map_members; /* Number of candidates in the cost_map list. */
157 struct cost_pair *cost_map;
158 /* The costs wrto the iv candidates. */
160 struct iv_cand *selected;
161 /* The selected candidate. */
164 /* The position where the iv is computed. */
165 enum iv_position
167 IP_NORMAL, /* At the end, just before the exit condition. */
168 IP_END, /* At the end of the latch block. */
169 IP_ORIGINAL /* The original biv. */
172 /* The induction variable candidate. */
173 struct iv_cand
175 unsigned id; /* The number of the candidate. */
176 bool important; /* Whether this is an "important" candidate, i.e. such
177 that it should be considered by all uses. */
178 enum iv_position pos; /* Where it is computed. */
179 tree incremented_at; /* For original biv, the statement where it is
180 incremented. */
181 tree var_before; /* The variable used for it before increment. */
182 tree var_after; /* The variable used for it after increment. */
183 struct iv *iv; /* The value of the candidate. NULL for
184 "pseudocandidate" used to indicate the possibility
185 to replace the final value of an iv by direct
186 computation of the value. */
187 unsigned cost; /* Cost of the candidate. */
188 bitmap depends_on; /* The list of invariants that are used in step of the
189 biv. */
192 /* The data used by the induction variable optimizations. */
194 typedef struct iv_use *iv_use_p;
195 DEF_VEC_P(iv_use_p);
196 DEF_VEC_ALLOC_P(iv_use_p,heap);
198 typedef struct iv_cand *iv_cand_p;
199 DEF_VEC_P(iv_cand_p);
200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
202 struct ivopts_data
204 /* The currently optimized loop. */
205 struct loop *current_loop;
207 /* Number of registers used in it. */
208 unsigned regs_used;
210 /* Numbers of iterations for all exits of the current loop. */
211 htab_t niters;
213 /* The size of version_info array allocated. */
214 unsigned version_info_size;
216 /* The array of information for the ssa names. */
217 struct version_info *version_info;
219 /* The bitmap of indices in version_info whose value was changed. */
220 bitmap relevant;
222 /* The maximum invariant id. */
223 unsigned max_inv_id;
225 /* The uses of induction variables. */
226 VEC(iv_use_p,heap) *iv_uses;
228 /* The candidates. */
229 VEC(iv_cand_p,heap) *iv_candidates;
231 /* A bitmap of important candidates. */
232 bitmap important_candidates;
234 /* Whether to consider just related and important candidates when replacing a
235 use. */
236 bool consider_all_candidates;
239 /* An assignment of iv candidates to uses. */
241 struct iv_ca
243 /* The number of uses covered by the assignment. */
244 unsigned upto;
246 /* Number of uses that cannot be expressed by the candidates in the set. */
247 unsigned bad_uses;
249 /* Candidate assigned to a use, together with the related costs. */
250 struct cost_pair **cand_for_use;
252 /* Number of times each candidate is used. */
253 unsigned *n_cand_uses;
255 /* The candidates used. */
256 bitmap cands;
258 /* The number of candidates in the set. */
259 unsigned n_cands;
261 /* Total number of registers needed. */
262 unsigned n_regs;
264 /* Total cost of expressing uses. */
265 unsigned cand_use_cost;
267 /* Total cost of candidates. */
268 unsigned cand_cost;
270 /* Number of times each invariant is used. */
271 unsigned *n_invariant_uses;
273 /* Total cost of the assignment. */
274 unsigned cost;
277 /* Difference of two iv candidate assignments. */
279 struct iv_ca_delta
281 /* Changed use. */
282 struct iv_use *use;
284 /* An old assignment (for rollback purposes). */
285 struct cost_pair *old_cp;
287 /* A new assignment. */
288 struct cost_pair *new_cp;
290 /* Next change in the list. */
291 struct iv_ca_delta *next_change;
294 /* Bound on number of candidates below that all candidates are considered. */
296 #define CONSIDER_ALL_CANDIDATES_BOUND \
297 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
300 optimizing such a loop would help, and it would take ages). */
302 #define MAX_CONSIDERED_USES \
303 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305 /* If there are at most this number of ivs in the set, try removing unnecessary
306 ivs from the set always. */
308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
309 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311 /* The list of trees for that the decl_rtl field must be reset is stored
312 here. */
314 static VEC(tree,heap) *decl_rtl_to_reset;
316 /* Number of uses recorded in DATA. */
318 static inline unsigned
319 n_iv_uses (struct ivopts_data *data)
321 return VEC_length (iv_use_p, data->iv_uses);
324 /* Ith use recorded in DATA. */
326 static inline struct iv_use *
327 iv_use (struct ivopts_data *data, unsigned i)
329 return VEC_index (iv_use_p, data->iv_uses, i);
332 /* Number of candidates recorded in DATA. */
334 static inline unsigned
335 n_iv_cands (struct ivopts_data *data)
337 return VEC_length (iv_cand_p, data->iv_candidates);
340 /* Ith candidate recorded in DATA. */
342 static inline struct iv_cand *
343 iv_cand (struct ivopts_data *data, unsigned i)
345 return VEC_index (iv_cand_p, data->iv_candidates, i);
348 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 edge
351 single_dom_exit (struct loop *loop)
353 edge exit = single_exit (loop);
355 if (!exit)
356 return NULL;
358 if (!just_once_each_iteration_p (loop, exit->src))
359 return NULL;
361 return exit;
364 /* Dumps information about the induction variable IV to FILE. */
366 extern void dump_iv (FILE *, struct iv *);
367 void
368 dump_iv (FILE *file, struct iv *iv)
370 if (iv->ssa_name)
372 fprintf (file, "ssa name ");
373 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
374 fprintf (file, "\n");
377 fprintf (file, " type ");
378 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
379 fprintf (file, "\n");
381 if (iv->step)
383 fprintf (file, " base ");
384 print_generic_expr (file, iv->base, TDF_SLIM);
385 fprintf (file, "\n");
387 fprintf (file, " step ");
388 print_generic_expr (file, iv->step, TDF_SLIM);
389 fprintf (file, "\n");
391 else
393 fprintf (file, " invariant ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
398 if (iv->base_object)
400 fprintf (file, " base object ");
401 print_generic_expr (file, iv->base_object, TDF_SLIM);
402 fprintf (file, "\n");
405 if (iv->biv_p)
406 fprintf (file, " is a biv\n");
409 /* Dumps information about the USE to FILE. */
411 extern void dump_use (FILE *, struct iv_use *);
412 void
413 dump_use (FILE *file, struct iv_use *use)
415 fprintf (file, "use %d\n", use->id);
417 switch (use->type)
419 case USE_NONLINEAR_EXPR:
420 fprintf (file, " generic\n");
421 break;
423 case USE_ADDRESS:
424 fprintf (file, " address\n");
425 break;
427 case USE_COMPARE:
428 fprintf (file, " compare\n");
429 break;
431 default:
432 gcc_unreachable ();
435 fprintf (file, " in statement ");
436 print_generic_expr (file, use->stmt, TDF_SLIM);
437 fprintf (file, "\n");
439 fprintf (file, " at position ");
440 if (use->op_p)
441 print_generic_expr (file, *use->op_p, TDF_SLIM);
442 fprintf (file, "\n");
444 dump_iv (file, use->iv);
446 if (use->related_cands)
448 fprintf (file, " related candidates ");
449 dump_bitmap (file, use->related_cands);
453 /* Dumps information about the uses to FILE. */
455 extern void dump_uses (FILE *, struct ivopts_data *);
456 void
457 dump_uses (FILE *file, struct ivopts_data *data)
459 unsigned i;
460 struct iv_use *use;
462 for (i = 0; i < n_iv_uses (data); i++)
464 use = iv_use (data, i);
466 dump_use (file, use);
467 fprintf (file, "\n");
471 /* Dumps information about induction variable candidate CAND to FILE. */
473 extern void dump_cand (FILE *, struct iv_cand *);
474 void
475 dump_cand (FILE *file, struct iv_cand *cand)
477 struct iv *iv = cand->iv;
479 fprintf (file, "candidate %d%s\n",
480 cand->id, cand->important ? " (important)" : "");
482 if (cand->depends_on)
484 fprintf (file, " depends on ");
485 dump_bitmap (file, cand->depends_on);
488 if (!iv)
490 fprintf (file, " final value replacement\n");
491 return;
494 switch (cand->pos)
496 case IP_NORMAL:
497 fprintf (file, " incremented before exit test\n");
498 break;
500 case IP_END:
501 fprintf (file, " incremented at end\n");
502 break;
504 case IP_ORIGINAL:
505 fprintf (file, " original biv\n");
506 break;
509 dump_iv (file, iv);
512 /* Returns the info for ssa version VER. */
514 static inline struct version_info *
515 ver_info (struct ivopts_data *data, unsigned ver)
517 return data->version_info + ver;
520 /* Returns the info for ssa name NAME. */
522 static inline struct version_info *
523 name_info (struct ivopts_data *data, tree name)
525 return ver_info (data, SSA_NAME_VERSION (name));
528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
529 emitted in LOOP. */
531 static bool
532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
534 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
536 gcc_assert (bb);
538 if (sbb == loop->latch)
539 return true;
541 if (sbb != bb)
542 return false;
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
550 static bool
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
558 return false;
560 if (stmt_bb != cand_bb)
561 return true;
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
568 return false;
569 if (bsi_stmt (bsi) == stmt)
570 return true;
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
577 static bool
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
580 switch (cand->pos)
582 case IP_END:
583 return false;
585 case IP_NORMAL:
586 return stmt_after_ip_normal_pos (loop, stmt);
588 case IP_ORIGINAL:
589 return stmt_after_ip_original_pos (cand, stmt);
591 default:
592 gcc_unreachable ();
596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
598 static bool
599 abnormal_ssa_name_p (tree exp)
601 if (!exp)
602 return false;
604 if (TREE_CODE (exp) != SSA_NAME)
605 return false;
607 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
611 abnormal phi node. Callback for for_each_index. */
613 static bool
614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
615 void *data ATTRIBUTE_UNUSED)
617 if (TREE_CODE (base) == ARRAY_REF)
619 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
620 return false;
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
622 return false;
625 return !abnormal_ssa_name_p (*index);
628 /* Returns true if EXPR contains a ssa name that occurs in an
629 abnormal phi node. */
631 bool
632 contains_abnormal_ssa_name_p (tree expr)
634 enum tree_code code;
635 enum tree_code_class class;
637 if (!expr)
638 return false;
640 code = TREE_CODE (expr);
641 class = TREE_CODE_CLASS (code);
643 if (code == SSA_NAME)
644 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
646 if (code == INTEGER_CST
647 || is_gimple_min_invariant (expr))
648 return false;
650 if (code == ADDR_EXPR)
651 return !for_each_index (&TREE_OPERAND (expr, 0),
652 idx_contains_abnormal_ssa_name_p,
653 NULL);
655 switch (class)
657 case tcc_binary:
658 case tcc_comparison:
659 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
660 return true;
662 /* Fallthru. */
663 case tcc_unary:
664 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
665 return true;
667 break;
669 default:
670 gcc_unreachable ();
673 return false;
676 /* Element of the table in that we cache the numbers of iterations obtained
677 from exits of the loop. */
679 struct nfe_cache_elt
681 /* The edge for that the number of iterations is cached. */
682 edge exit;
684 /* Number of iterations corresponding to this exit, or NULL if it cannot be
685 determined. */
686 tree niter;
689 /* Hash function for nfe_cache_elt E. */
691 static hashval_t
692 nfe_hash (const void *e)
694 const struct nfe_cache_elt *elt = e;
696 return htab_hash_pointer (elt->exit);
699 /* Equality function for nfe_cache_elt E1 and edge E2. */
701 static int
702 nfe_eq (const void *e1, const void *e2)
704 const struct nfe_cache_elt *elt1 = e1;
706 return elt1->exit == e2;
709 /* Returns tree describing number of iterations determined from
710 EXIT of DATA->current_loop, or NULL if something goes wrong. */
712 static tree
713 niter_for_exit (struct ivopts_data *data, edge exit)
715 struct nfe_cache_elt *nfe_desc;
716 struct tree_niter_desc desc;
717 PTR *slot;
719 slot = htab_find_slot_with_hash (data->niters, exit,
720 htab_hash_pointer (exit),
721 INSERT);
723 if (!*slot)
725 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
726 nfe_desc->exit = exit;
728 /* Try to determine number of iterations. We must know it
729 unconditionally (i.e., without possibility of # of iterations
730 being zero). Also, we cannot safely work with ssa names that
731 appear in phi nodes on abnormal edges, so that we do not create
732 overlapping life ranges for them (PR 27283). */
733 if (number_of_iterations_exit (data->current_loop,
734 exit, &desc, true)
735 && integer_zerop (desc.may_be_zero)
736 && !contains_abnormal_ssa_name_p (desc.niter))
737 nfe_desc->niter = desc.niter;
738 else
739 nfe_desc->niter = NULL_TREE;
741 else
742 nfe_desc = *slot;
744 return nfe_desc->niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
756 if (!exit)
757 return NULL;
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
782 static tree
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj, op0, op1;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (TREE_CODE (expr) == NOP_EXPR
792 || TREE_CODE (expr) == CONVERT_EXPR)
793 return determine_base_object (TREE_OPERAND (expr, 0));
795 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
796 return NULL_TREE;
798 switch (code)
800 case INTEGER_CST:
801 return NULL_TREE;
803 case ADDR_EXPR:
804 obj = TREE_OPERAND (expr, 0);
805 base = get_base_address (obj);
807 if (!base)
808 return expr;
810 if (TREE_CODE (base) == INDIRECT_REF)
811 return determine_base_object (TREE_OPERAND (base, 0));
813 return fold_convert (ptr_type_node,
814 build_fold_addr_expr (base));
816 case PLUS_EXPR:
817 case MINUS_EXPR:
818 op0 = determine_base_object (TREE_OPERAND (expr, 0));
819 op1 = determine_base_object (TREE_OPERAND (expr, 1));
821 if (!op1)
822 return op0;
824 if (!op0)
825 return (code == PLUS_EXPR
826 ? op1
827 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
829 return fold_build2 (code, ptr_type_node, op0, op1);
831 default:
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
837 for loop LOOP. */
839 static struct iv *
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
845 iv->base = base;
846 iv->base_object = determine_base_object (base);
847 iv->step = step;
848 iv->biv_p = false;
849 iv->have_use_for = false;
850 iv->use_id = 0;
851 iv->ssa_name = NULL_TREE;
853 return iv;
856 /* Sets STEP and BASE for induction variable IV. */
858 static void
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
861 struct version_info *info = name_info (data, iv);
863 gcc_assert (!info->iv);
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
870 /* Finds induction variable declaration for VAR. */
872 static struct iv *
873 get_iv (struct ivopts_data *data, tree var)
875 basic_block bb;
876 tree type = TREE_TYPE (var);
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
880 return NULL;
882 if (!name_info (data, var)->iv)
884 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
886 if (!bb
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
891 return name_info (data, var)->iv;
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
897 static tree
898 determine_biv_step (tree phi)
900 struct loop *loop = bb_for_stmt (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
902 affine_iv iv;
904 if (!is_gimple_reg (name))
905 return NULL_TREE;
907 if (!simple_iv (loop, phi, name, &iv, true))
908 return NULL_TREE;
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
913 /* Finds basic ivs. */
915 static bool
916 find_bivs (struct ivopts_data *data)
918 tree phi, step, type, base;
919 bool found = false;
920 struct loop *loop = data->current_loop;
922 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
924 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
925 continue;
927 step = determine_biv_step (phi);
928 if (!step)
929 continue;
931 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
932 base = expand_simple_operations (base);
933 if (contains_abnormal_ssa_name_p (base)
934 || contains_abnormal_ssa_name_p (step))
935 continue;
937 type = TREE_TYPE (PHI_RESULT (phi));
938 base = fold_convert (type, base);
939 if (step)
940 step = fold_convert (type, step);
942 set_iv (data, PHI_RESULT (phi), base, step);
943 found = true;
946 return found;
949 /* Marks basic ivs. */
951 static void
952 mark_bivs (struct ivopts_data *data)
954 tree phi, var;
955 struct iv *iv, *incr_iv;
956 struct loop *loop = data->current_loop;
957 basic_block incr_bb;
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
961 iv = get_iv (data, PHI_RESULT (phi));
962 if (!iv)
963 continue;
965 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
966 incr_iv = get_iv (data, var);
967 if (!incr_iv)
968 continue;
970 /* If the increment is in the subloop, ignore it. */
971 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
972 if (incr_bb->loop_father != data->current_loop
973 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
974 continue;
976 iv->biv_p = true;
977 incr_iv->biv_p = true;
981 /* Checks whether STMT defines a linear induction variable and stores its
982 parameters to IV. */
984 static bool
985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
987 tree lhs;
988 struct loop *loop = data->current_loop;
990 iv->base = NULL_TREE;
991 iv->step = NULL_TREE;
993 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
994 return false;
996 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
997 if (TREE_CODE (lhs) != SSA_NAME)
998 return false;
1000 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001 return false;
1002 iv->base = expand_simple_operations (iv->base);
1004 if (contains_abnormal_ssa_name_p (iv->base)
1005 || contains_abnormal_ssa_name_p (iv->step))
1006 return false;
1008 return true;
1011 /* Finds general ivs in statement STMT. */
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1016 affine_iv iv;
1018 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019 return;
1021 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1024 /* Finds general ivs in basic block BB. */
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1029 block_stmt_iterator bsi;
1031 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032 find_givs_in_stmt (data, bsi_stmt (bsi));
1035 /* Finds general ivs. */
1037 static void
1038 find_givs (struct ivopts_data *data)
1040 struct loop *loop = data->current_loop;
1041 basic_block *body = get_loop_body_in_dom_order (loop);
1042 unsigned i;
1044 for (i = 0; i < loop->num_nodes; i++)
1045 find_givs_in_bb (data, body[i]);
1046 free (body);
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050 variable and if so, its initial value and step. */
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1055 unsigned i;
1056 bitmap_iterator bi;
1058 if (!find_bivs (data))
1059 return false;
1061 find_givs (data);
1062 mark_bivs (data);
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1066 tree niter = niter_for_single_dom_exit (data);
1068 if (niter)
1070 fprintf (dump_file, " number of iterations ");
1071 print_generic_expr (dump_file, niter, TDF_SLIM);
1072 fprintf (dump_file, "\n\n");
1075 fprintf (dump_file, "Induction variables:\n\n");
1077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1079 if (ver_info (data, i)->iv)
1080 dump_iv (dump_file, ver_info (data, i)->iv);
1084 return true;
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091 tree stmt, enum use_type use_type)
1093 struct iv_use *use = XCNEW (struct iv_use);
1095 use->id = n_iv_uses (data);
1096 use->type = use_type;
1097 use->iv = iv;
1098 use->stmt = stmt;
1099 use->op_p = use_p;
1100 use->related_cands = BITMAP_ALLOC (NULL);
1102 /* To avoid showing ssa name in the dumps, if it was not reset by the
1103 caller. */
1104 iv->ssa_name = NULL_TREE;
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1107 dump_use (dump_file, use);
1109 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1111 return use;
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115 NONLINEAR_USE is true if the invariant is used in a way we do not
1116 handle specially. */
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1121 basic_block bb;
1122 struct version_info *info;
1124 if (TREE_CODE (op) != SSA_NAME
1125 || !is_gimple_reg (op))
1126 return;
1128 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129 if (bb
1130 && flow_bb_inside_loop_p (data->current_loop, bb))
1131 return;
1133 info = name_info (data, op);
1134 info->name = op;
1135 info->has_nonlin_use |= nonlinear_use;
1136 if (!info->inv_id)
1137 info->inv_id = ++data->max_inv_id;
1138 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1141 /* Checks whether the use OP is interesting and if so, records it. */
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1146 struct iv *iv;
1147 struct iv *civ;
1148 tree stmt;
1149 struct iv_use *use;
1151 if (TREE_CODE (op) != SSA_NAME)
1152 return NULL;
1154 iv = get_iv (data, op);
1155 if (!iv)
1156 return NULL;
1158 if (iv->have_use_for)
1160 use = iv_use (data, iv->use_id);
1162 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163 return use;
1166 if (integer_zerop (iv->step))
1168 record_invariant (data, op, true);
1169 return NULL;
1171 iv->have_use_for = true;
1173 civ = XNEW (struct iv);
1174 *civ = *iv;
1176 stmt = SSA_NAME_DEF_STMT (op);
1177 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1180 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181 iv->use_id = use->id;
1183 return use;
1186 /* Given a condition *COND_P, checks whether it is a compare of an induction
1187 variable and an invariant. If this is the case, CONTROL_VAR is set
1188 to location of the iv, BOUND to the location of the invariant,
1189 IV_VAR and IV_BOUND are set to the corresponding induction variable
1190 descriptions, and true is returned. If this is not the case,
1191 CONTROL_VAR and BOUND are set to the arguments of the condition and
1192 false is returned. */
1194 static bool
1195 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1196 tree **control_var, tree **bound,
1197 struct iv **iv_var, struct iv **iv_bound)
1199 /* The nodes returned when COND has just one operand. Note that you should
1200 not modify anything in BOUND or IV_BOUND because of this. */
1201 static struct iv const_iv;
1202 static tree zero;
1203 tree cond = *cond_p;
1204 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1205 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1206 bool ret = false;
1208 zero = integer_zero_node;
1209 const_iv.step = integer_zero_node;
1211 if (TREE_CODE (cond) == SSA_NAME)
1213 op0 = cond_p;
1214 iv0 = get_iv (data, cond);
1215 ret = (iv0 && !integer_zerop (iv0->step));
1216 goto end;
1219 if (!COMPARISON_CLASS_P (cond))
1221 op0 = cond_p;
1222 goto end;
1225 op0 = &TREE_OPERAND (cond, 0);
1226 op1 = &TREE_OPERAND (cond, 1);
1227 if (TREE_CODE (*op0) == SSA_NAME)
1228 iv0 = get_iv (data, *op0);
1229 if (TREE_CODE (*op1) == SSA_NAME)
1230 iv1 = get_iv (data, *op1);
1232 /* Exactly one of the compared values must be an iv, and the other one must
1233 be an invariant. */
1234 if (!iv0 || !iv1)
1235 goto end;
1237 if (integer_zerop (iv0->step))
1239 /* Control variable may be on the other side. */
1240 tmp_op = op0; op0 = op1; op1 = tmp_op;
1241 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1243 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1245 end:
1246 if (control_var)
1247 *control_var = op0;;
1248 if (iv_var)
1249 *iv_var = iv0;;
1250 if (bound)
1251 *bound = op1;
1252 if (iv_bound)
1253 *iv_bound = iv1;
1255 return ret;
1258 /* Checks whether the condition *COND_P in STMT is interesting
1259 and if so, records it. */
1261 static void
1262 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1264 tree *var_p, *bound_p;
1265 struct iv *var_iv, *civ;
1267 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1269 find_interesting_uses_op (data, *var_p);
1270 find_interesting_uses_op (data, *bound_p);
1271 return;
1274 civ = XNEW (struct iv);
1275 *civ = *var_iv;
1276 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1279 /* Returns true if expression EXPR is obviously invariant in LOOP,
1280 i.e. if all its operands are defined outside of the LOOP. */
1282 bool
1283 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1285 basic_block def_bb;
1286 unsigned i, len;
1288 if (is_gimple_min_invariant (expr))
1289 return true;
1291 if (TREE_CODE (expr) == SSA_NAME)
1293 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1294 if (def_bb
1295 && flow_bb_inside_loop_p (loop, def_bb))
1296 return false;
1298 return true;
1301 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1302 return false;
1304 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1305 for (i = 0; i < len; i++)
1306 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1307 return false;
1309 return true;
1312 /* Cumulates the steps of indices into DATA and replaces their values with the
1313 initial ones. Returns false when the value of the index cannot be determined.
1314 Callback for for_each_index. */
1316 struct ifs_ivopts_data
1318 struct ivopts_data *ivopts_data;
1319 tree stmt;
1320 tree step;
1323 static bool
1324 idx_find_step (tree base, tree *idx, void *data)
1326 struct ifs_ivopts_data *dta = data;
1327 struct iv *iv;
1328 tree step, iv_base, iv_step, lbound, off;
1329 struct loop *loop = dta->ivopts_data->current_loop;
1331 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1332 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1333 return false;
1335 /* If base is a component ref, require that the offset of the reference
1336 be invariant. */
1337 if (TREE_CODE (base) == COMPONENT_REF)
1339 off = component_ref_field_offset (base);
1340 return expr_invariant_in_loop_p (loop, off);
1343 /* If base is array, first check whether we will be able to move the
1344 reference out of the loop (in order to take its address in strength
1345 reduction). In order for this to work we need both lower bound
1346 and step to be loop invariants. */
1347 if (TREE_CODE (base) == ARRAY_REF)
1349 step = array_ref_element_size (base);
1350 lbound = array_ref_low_bound (base);
1352 if (!expr_invariant_in_loop_p (loop, step)
1353 || !expr_invariant_in_loop_p (loop, lbound))
1354 return false;
1357 if (TREE_CODE (*idx) != SSA_NAME)
1358 return true;
1360 iv = get_iv (dta->ivopts_data, *idx);
1361 if (!iv)
1362 return false;
1364 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1365 *&x[0], which is not folded and does not trigger the
1366 ARRAY_REF path below. */
1367 *idx = iv->base;
1369 if (integer_zerop (iv->step))
1370 return true;
1372 if (TREE_CODE (base) == ARRAY_REF)
1374 step = array_ref_element_size (base);
1376 /* We only handle addresses whose step is an integer constant. */
1377 if (TREE_CODE (step) != INTEGER_CST)
1378 return false;
1380 else
1381 /* The step for pointer arithmetics already is 1 byte. */
1382 step = build_int_cst (sizetype, 1);
1384 iv_base = iv->base;
1385 iv_step = iv->step;
1386 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1387 sizetype, &iv_base, &iv_step, dta->stmt,
1388 false))
1390 /* The index might wrap. */
1391 return false;
1394 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1395 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1397 return true;
1400 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1401 object is passed to it in DATA. */
1403 static bool
1404 idx_record_use (tree base, tree *idx,
1405 void *data)
1407 find_interesting_uses_op (data, *idx);
1408 if (TREE_CODE (base) == ARRAY_REF)
1410 find_interesting_uses_op (data, array_ref_element_size (base));
1411 find_interesting_uses_op (data, array_ref_low_bound (base));
1413 return true;
1416 /* Returns true if memory reference REF may be unaligned. */
1418 static bool
1419 may_be_unaligned_p (tree ref)
1421 tree base;
1422 tree base_type;
1423 HOST_WIDE_INT bitsize;
1424 HOST_WIDE_INT bitpos;
1425 tree toffset;
1426 enum machine_mode mode;
1427 int unsignedp, volatilep;
1428 unsigned base_align;
1430 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1431 thus they are not misaligned. */
1432 if (TREE_CODE (ref) == TARGET_MEM_REF)
1433 return false;
1435 /* The test below is basically copy of what expr.c:normal_inner_ref
1436 does to check whether the object must be loaded by parts when
1437 STRICT_ALIGNMENT is true. */
1438 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1439 &unsignedp, &volatilep, true);
1440 base_type = TREE_TYPE (base);
1441 base_align = TYPE_ALIGN (base_type);
1443 if (mode != BLKmode
1444 && (base_align < GET_MODE_ALIGNMENT (mode)
1445 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1446 || bitpos % BITS_PER_UNIT != 0))
1447 return true;
1449 return false;
1452 /* Return true if EXPR may be non-addressable. */
1454 static bool
1455 may_be_nonaddressable_p (tree expr)
1457 switch (TREE_CODE (expr))
1459 case COMPONENT_REF:
1460 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1461 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1463 case ARRAY_REF:
1464 case ARRAY_RANGE_REF:
1465 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1467 case VIEW_CONVERT_EXPR:
1468 /* This kind of view-conversions may wrap non-addressable objects
1469 and make them look addressable. After some processing the
1470 non-addressability may be uncovered again, causing ADDR_EXPRs
1471 of inappropriate objects to be built. */
1472 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1473 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1475 default:
1476 break;
1479 return false;
1482 /* Finds addresses in *OP_P inside STMT. */
1484 static void
1485 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1487 tree base = *op_p, step = build_int_cst (sizetype, 0);
1488 struct iv *civ;
1489 struct ifs_ivopts_data ifs_ivopts_data;
1491 /* Do not play with volatile memory references. A bit too conservative,
1492 perhaps, but safe. */
1493 if (stmt_ann (stmt)->has_volatile_ops)
1494 goto fail;
1496 /* Ignore bitfields for now. Not really something terribly complicated
1497 to handle. TODO. */
1498 if (TREE_CODE (base) == BIT_FIELD_REF)
1499 goto fail;
1501 if (may_be_nonaddressable_p (base))
1502 goto fail;
1504 if (STRICT_ALIGNMENT
1505 && may_be_unaligned_p (base))
1506 goto fail;
1508 base = unshare_expr (base);
1510 if (TREE_CODE (base) == TARGET_MEM_REF)
1512 tree type = build_pointer_type (TREE_TYPE (base));
1513 tree astep;
1515 if (TMR_BASE (base)
1516 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1518 civ = get_iv (data, TMR_BASE (base));
1519 if (!civ)
1520 goto fail;
1522 TMR_BASE (base) = civ->base;
1523 step = civ->step;
1525 if (TMR_INDEX (base)
1526 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1528 civ = get_iv (data, TMR_INDEX (base));
1529 if (!civ)
1530 goto fail;
1532 TMR_INDEX (base) = civ->base;
1533 astep = civ->step;
1535 if (astep)
1537 if (TMR_STEP (base))
1538 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1540 step = fold_build2 (PLUS_EXPR, type, step, astep);
1544 if (integer_zerop (step))
1545 goto fail;
1546 base = tree_mem_ref_addr (type, base);
1548 else
1550 ifs_ivopts_data.ivopts_data = data;
1551 ifs_ivopts_data.stmt = stmt;
1552 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1553 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1554 || integer_zerop (ifs_ivopts_data.step))
1555 goto fail;
1556 step = ifs_ivopts_data.step;
1558 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1559 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1561 base = build_fold_addr_expr (base);
1563 /* Substituting bases of IVs into the base expression might
1564 have caused folding opportunities. */
1565 if (TREE_CODE (base) == ADDR_EXPR)
1567 tree *ref = &TREE_OPERAND (base, 0);
1568 while (handled_component_p (*ref))
1569 ref = &TREE_OPERAND (*ref, 0);
1570 if (TREE_CODE (*ref) == INDIRECT_REF)
1571 *ref = fold_indirect_ref (*ref);
1575 civ = alloc_iv (base, step);
1576 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1577 return;
1579 fail:
1580 for_each_index (op_p, idx_record_use, data);
1583 /* Finds and records invariants used in STMT. */
1585 static void
1586 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1588 ssa_op_iter iter;
1589 use_operand_p use_p;
1590 tree op;
1592 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1594 op = USE_FROM_PTR (use_p);
1595 record_invariant (data, op, false);
1599 /* Finds interesting uses of induction variables in the statement STMT. */
1601 static void
1602 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1604 struct iv *iv;
1605 tree op, lhs, rhs;
1606 ssa_op_iter iter;
1607 use_operand_p use_p;
1609 find_invariants_stmt (data, stmt);
1611 if (TREE_CODE (stmt) == COND_EXPR)
1613 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1614 return;
1617 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1619 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1620 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1622 if (TREE_CODE (lhs) == SSA_NAME)
1624 /* If the statement defines an induction variable, the uses are not
1625 interesting by themselves. */
1627 iv = get_iv (data, lhs);
1629 if (iv && !integer_zerop (iv->step))
1630 return;
1633 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1635 case tcc_comparison:
1636 find_interesting_uses_cond (data, stmt,
1637 &GIMPLE_STMT_OPERAND (stmt, 1));
1638 return;
1640 case tcc_reference:
1641 find_interesting_uses_address (data, stmt,
1642 &GIMPLE_STMT_OPERAND (stmt, 1));
1643 if (REFERENCE_CLASS_P (lhs))
1644 find_interesting_uses_address (data, stmt,
1645 &GIMPLE_STMT_OPERAND (stmt, 0));
1646 return;
1648 default: ;
1651 if (REFERENCE_CLASS_P (lhs)
1652 && is_gimple_val (rhs))
1654 find_interesting_uses_address (data, stmt,
1655 &GIMPLE_STMT_OPERAND (stmt, 0));
1656 find_interesting_uses_op (data, rhs);
1657 return;
1660 /* TODO -- we should also handle address uses of type
1662 memory = call (whatever);
1666 call (memory). */
1669 if (TREE_CODE (stmt) == PHI_NODE
1670 && bb_for_stmt (stmt) == data->current_loop->header)
1672 lhs = PHI_RESULT (stmt);
1673 iv = get_iv (data, lhs);
1675 if (iv && !integer_zerop (iv->step))
1676 return;
1679 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1681 op = USE_FROM_PTR (use_p);
1683 if (TREE_CODE (op) != SSA_NAME)
1684 continue;
1686 iv = get_iv (data, op);
1687 if (!iv)
1688 continue;
1690 find_interesting_uses_op (data, op);
1694 /* Finds interesting uses of induction variables outside of loops
1695 on loop exit edge EXIT. */
1697 static void
1698 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1700 tree phi, def;
1702 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1704 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1705 if (is_gimple_reg (def))
1706 find_interesting_uses_op (data, def);
1710 /* Finds uses of the induction variables that are interesting. */
1712 static void
1713 find_interesting_uses (struct ivopts_data *data)
1715 basic_block bb;
1716 block_stmt_iterator bsi;
1717 tree phi;
1718 basic_block *body = get_loop_body (data->current_loop);
1719 unsigned i;
1720 struct version_info *info;
1721 edge e;
1723 if (dump_file && (dump_flags & TDF_DETAILS))
1724 fprintf (dump_file, "Uses:\n\n");
1726 for (i = 0; i < data->current_loop->num_nodes; i++)
1728 edge_iterator ei;
1729 bb = body[i];
1731 FOR_EACH_EDGE (e, ei, bb->succs)
1732 if (e->dest != EXIT_BLOCK_PTR
1733 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1734 find_interesting_uses_outside (data, e);
1736 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1737 find_interesting_uses_stmt (data, phi);
1738 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1739 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1742 if (dump_file && (dump_flags & TDF_DETAILS))
1744 bitmap_iterator bi;
1746 fprintf (dump_file, "\n");
1748 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1750 info = ver_info (data, i);
1751 if (info->inv_id)
1753 fprintf (dump_file, " ");
1754 print_generic_expr (dump_file, info->name, TDF_SLIM);
1755 fprintf (dump_file, " is invariant (%d)%s\n",
1756 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1760 fprintf (dump_file, "\n");
1763 free (body);
1766 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1767 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1768 we are at the top-level of the processed address. */
1770 static tree
1771 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1772 unsigned HOST_WIDE_INT *offset)
1774 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1775 enum tree_code code;
1776 tree type, orig_type = TREE_TYPE (expr);
1777 unsigned HOST_WIDE_INT off0, off1, st;
1778 tree orig_expr = expr;
1780 STRIP_NOPS (expr);
1782 type = TREE_TYPE (expr);
1783 code = TREE_CODE (expr);
1784 *offset = 0;
1786 switch (code)
1788 case INTEGER_CST:
1789 if (!cst_and_fits_in_hwi (expr)
1790 || integer_zerop (expr))
1791 return orig_expr;
1793 *offset = int_cst_value (expr);
1794 return build_int_cst (orig_type, 0);
1796 case PLUS_EXPR:
1797 case MINUS_EXPR:
1798 op0 = TREE_OPERAND (expr, 0);
1799 op1 = TREE_OPERAND (expr, 1);
1801 op0 = strip_offset_1 (op0, false, false, &off0);
1802 op1 = strip_offset_1 (op1, false, false, &off1);
1804 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1805 if (op0 == TREE_OPERAND (expr, 0)
1806 && op1 == TREE_OPERAND (expr, 1))
1807 return orig_expr;
1809 if (integer_zerop (op1))
1810 expr = op0;
1811 else if (integer_zerop (op0))
1813 if (code == PLUS_EXPR)
1814 expr = op1;
1815 else
1816 expr = fold_build1 (NEGATE_EXPR, type, op1);
1818 else
1819 expr = fold_build2 (code, type, op0, op1);
1821 return fold_convert (orig_type, expr);
1823 case ARRAY_REF:
1824 if (!inside_addr)
1825 return orig_expr;
1827 step = array_ref_element_size (expr);
1828 if (!cst_and_fits_in_hwi (step))
1829 break;
1831 st = int_cst_value (step);
1832 op1 = TREE_OPERAND (expr, 1);
1833 op1 = strip_offset_1 (op1, false, false, &off1);
1834 *offset = off1 * st;
1836 if (top_compref
1837 && integer_zerop (op1))
1839 /* Strip the component reference completely. */
1840 op0 = TREE_OPERAND (expr, 0);
1841 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1842 *offset += off0;
1843 return op0;
1845 break;
1847 case COMPONENT_REF:
1848 if (!inside_addr)
1849 return orig_expr;
1851 tmp = component_ref_field_offset (expr);
1852 if (top_compref
1853 && cst_and_fits_in_hwi (tmp))
1855 /* Strip the component reference completely. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858 *offset = off0 + int_cst_value (tmp);
1859 return op0;
1861 break;
1863 case ADDR_EXPR:
1864 op0 = TREE_OPERAND (expr, 0);
1865 op0 = strip_offset_1 (op0, true, true, &off0);
1866 *offset += off0;
1868 if (op0 == TREE_OPERAND (expr, 0))
1869 return orig_expr;
1871 expr = build_fold_addr_expr (op0);
1872 return fold_convert (orig_type, expr);
1874 case INDIRECT_REF:
1875 inside_addr = false;
1876 break;
1878 default:
1879 return orig_expr;
1882 /* Default handling of expressions for that we want to recurse into
1883 the first operand. */
1884 op0 = TREE_OPERAND (expr, 0);
1885 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1886 *offset += off0;
1888 if (op0 == TREE_OPERAND (expr, 0)
1889 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1890 return orig_expr;
1892 expr = copy_node (expr);
1893 TREE_OPERAND (expr, 0) = op0;
1894 if (op1)
1895 TREE_OPERAND (expr, 1) = op1;
1897 /* Inside address, we might strip the top level component references,
1898 thus changing type of the expression. Handling of ADDR_EXPR
1899 will fix that. */
1900 expr = fold_convert (orig_type, expr);
1902 return expr;
1905 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1907 static tree
1908 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1910 return strip_offset_1 (expr, false, false, offset);
1913 /* Returns variant of TYPE that can be used as base for different uses.
1914 We return unsigned type with the same precision, which avoids problems
1915 with overflows. */
1917 static tree
1918 generic_type_for (tree type)
1920 if (POINTER_TYPE_P (type))
1921 return unsigned_type_for (type);
1923 if (TYPE_UNSIGNED (type))
1924 return type;
1926 return unsigned_type_for (type);
1929 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1930 the bitmap to that we should store it. */
1932 static struct ivopts_data *fd_ivopts_data;
1933 static tree
1934 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1936 bitmap *depends_on = data;
1937 struct version_info *info;
1939 if (TREE_CODE (*expr_p) != SSA_NAME)
1940 return NULL_TREE;
1941 info = name_info (fd_ivopts_data, *expr_p);
1943 if (!info->inv_id || info->has_nonlin_use)
1944 return NULL_TREE;
1946 if (!*depends_on)
1947 *depends_on = BITMAP_ALLOC (NULL);
1948 bitmap_set_bit (*depends_on, info->inv_id);
1950 return NULL_TREE;
1953 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1954 position to POS. If USE is not NULL, the candidate is set as related to
1955 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1956 replacement of the final value of the iv by a direct computation. */
1958 static struct iv_cand *
1959 add_candidate_1 (struct ivopts_data *data,
1960 tree base, tree step, bool important, enum iv_position pos,
1961 struct iv_use *use, tree incremented_at)
1963 unsigned i;
1964 struct iv_cand *cand = NULL;
1965 tree type, orig_type;
1967 if (base)
1969 orig_type = TREE_TYPE (base);
1970 type = generic_type_for (orig_type);
1971 if (type != orig_type)
1973 base = fold_convert (type, base);
1974 step = fold_convert (type, step);
1978 for (i = 0; i < n_iv_cands (data); i++)
1980 cand = iv_cand (data, i);
1982 if (cand->pos != pos)
1983 continue;
1985 if (cand->incremented_at != incremented_at)
1986 continue;
1988 if (!cand->iv)
1990 if (!base && !step)
1991 break;
1993 continue;
1996 if (!base && !step)
1997 continue;
1999 if (operand_equal_p (base, cand->iv->base, 0)
2000 && operand_equal_p (step, cand->iv->step, 0))
2001 break;
2004 if (i == n_iv_cands (data))
2006 cand = XCNEW (struct iv_cand);
2007 cand->id = i;
2009 if (!base && !step)
2010 cand->iv = NULL;
2011 else
2012 cand->iv = alloc_iv (base, step);
2014 cand->pos = pos;
2015 if (pos != IP_ORIGINAL && cand->iv)
2017 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2018 cand->var_after = cand->var_before;
2020 cand->important = important;
2021 cand->incremented_at = incremented_at;
2022 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2024 if (step
2025 && TREE_CODE (step) != INTEGER_CST)
2027 fd_ivopts_data = data;
2028 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2031 if (dump_file && (dump_flags & TDF_DETAILS))
2032 dump_cand (dump_file, cand);
2035 if (important && !cand->important)
2037 cand->important = true;
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2039 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2042 if (use)
2044 bitmap_set_bit (use->related_cands, i);
2045 if (dump_file && (dump_flags & TDF_DETAILS))
2046 fprintf (dump_file, "Candidate %d is related to use %d\n",
2047 cand->id, use->id);
2050 return cand;
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2054 is allowed.
2056 The purpose is to avoid splitting latch edge with a biv increment, thus
2057 creating a jump, possibly confusing other optimization passes and leaving
2058 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2059 is not available (so we do not have a better alternative), or if the latch
2060 edge is already nonempty. */
2062 static bool
2063 allow_ip_end_pos_p (struct loop *loop)
2065 if (!ip_normal_pos (loop))
2066 return true;
2068 if (!empty_block_p (ip_end_pos (loop)))
2069 return true;
2071 return false;
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. The candidate computation is scheduled on all available positions. */
2078 static void
2079 add_candidate (struct ivopts_data *data,
2080 tree base, tree step, bool important, struct iv_use *use)
2082 if (ip_normal_pos (data->current_loop))
2083 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2084 if (ip_end_pos (data->current_loop)
2085 && allow_ip_end_pos_p (data->current_loop))
2086 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090 type with SIZE bits. */
2092 static void
2093 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2094 unsigned int size)
2096 tree type = lang_hooks.types.type_for_size (size, true);
2097 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2098 true, NULL);
2101 /* Adds standard iv candidates. */
2103 static void
2104 add_standard_iv_candidates (struct ivopts_data *data)
2106 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2108 /* The same for a double-integer type if it is still fast enough. */
2109 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2110 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2114 /* Adds candidates bases on the old induction variable IV. */
2116 static void
2117 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2119 tree phi, def;
2120 struct iv_cand *cand;
2122 add_candidate (data, iv->base, iv->step, true, NULL);
2124 /* The same, but with initial value zero. */
2125 add_candidate (data,
2126 build_int_cst (TREE_TYPE (iv->base), 0),
2127 iv->step, true, NULL);
2129 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2130 if (TREE_CODE (phi) == PHI_NODE)
2132 /* Additionally record the possibility of leaving the original iv
2133 untouched. */
2134 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2135 cand = add_candidate_1 (data,
2136 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2137 SSA_NAME_DEF_STMT (def));
2138 cand->var_before = iv->ssa_name;
2139 cand->var_after = def;
2143 /* Adds candidates based on the old induction variables. */
2145 static void
2146 add_old_ivs_candidates (struct ivopts_data *data)
2148 unsigned i;
2149 struct iv *iv;
2150 bitmap_iterator bi;
2152 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2154 iv = ver_info (data, i)->iv;
2155 if (iv && iv->biv_p && !integer_zerop (iv->step))
2156 add_old_iv_candidates (data, iv);
2160 /* Adds candidates based on the value of the induction variable IV and USE. */
2162 static void
2163 add_iv_value_candidates (struct ivopts_data *data,
2164 struct iv *iv, struct iv_use *use)
2166 unsigned HOST_WIDE_INT offset;
2167 tree base;
2169 add_candidate (data, iv->base, iv->step, false, use);
2171 /* The same, but with initial value zero. Make such variable important,
2172 since it is generic enough so that possibly many uses may be based
2173 on it. */
2174 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2175 iv->step, true, use);
2177 /* Third, try removing the constant offset. */
2178 base = strip_offset (iv->base, &offset);
2179 if (offset)
2180 add_candidate (data, base, iv->step, false, use);
2183 /* Adds candidates based on the uses. */
2185 static void
2186 add_derived_ivs_candidates (struct ivopts_data *data)
2188 unsigned i;
2190 for (i = 0; i < n_iv_uses (data); i++)
2192 struct iv_use *use = iv_use (data, i);
2194 if (!use)
2195 continue;
2197 switch (use->type)
2199 case USE_NONLINEAR_EXPR:
2200 case USE_COMPARE:
2201 case USE_ADDRESS:
2202 /* Just add the ivs based on the value of the iv used here. */
2203 add_iv_value_candidates (data, use->iv, use);
2204 break;
2206 default:
2207 gcc_unreachable ();
2212 /* Record important candidates and add them to related_cands bitmaps
2213 if needed. */
2215 static void
2216 record_important_candidates (struct ivopts_data *data)
2218 unsigned i;
2219 struct iv_use *use;
2221 for (i = 0; i < n_iv_cands (data); i++)
2223 struct iv_cand *cand = iv_cand (data, i);
2225 if (cand->important)
2226 bitmap_set_bit (data->important_candidates, i);
2229 data->consider_all_candidates = (n_iv_cands (data)
2230 <= CONSIDER_ALL_CANDIDATES_BOUND);
2232 if (data->consider_all_candidates)
2234 /* We will not need "related_cands" bitmaps in this case,
2235 so release them to decrease peak memory consumption. */
2236 for (i = 0; i < n_iv_uses (data); i++)
2238 use = iv_use (data, i);
2239 BITMAP_FREE (use->related_cands);
2242 else
2244 /* Add important candidates to the related_cands bitmaps. */
2245 for (i = 0; i < n_iv_uses (data); i++)
2246 bitmap_ior_into (iv_use (data, i)->related_cands,
2247 data->important_candidates);
2251 /* Finds the candidates for the induction variables. */
2253 static void
2254 find_iv_candidates (struct ivopts_data *data)
2256 /* Add commonly used ivs. */
2257 add_standard_iv_candidates (data);
2259 /* Add old induction variables. */
2260 add_old_ivs_candidates (data);
2262 /* Add induction variables derived from uses. */
2263 add_derived_ivs_candidates (data);
2265 /* Record the important candidates. */
2266 record_important_candidates (data);
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271 we allocate a simple list to every use. */
2273 static void
2274 alloc_use_cost_map (struct ivopts_data *data)
2276 unsigned i, size, s, j;
2278 for (i = 0; i < n_iv_uses (data); i++)
2280 struct iv_use *use = iv_use (data, i);
2281 bitmap_iterator bi;
2283 if (data->consider_all_candidates)
2284 size = n_iv_cands (data);
2285 else
2287 s = 0;
2288 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2290 s++;
2293 /* Round up to the power of two, so that moduling by it is fast. */
2294 for (size = 1; size < s; size <<= 1)
2295 continue;
2298 use->n_map_members = size;
2299 use->cost_map = XCNEWVEC (struct cost_pair, size);
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304 on invariants DEPENDS_ON and that the value used in expressing it
2305 is VALUE.*/
2307 static void
2308 set_use_iv_cost (struct ivopts_data *data,
2309 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2310 bitmap depends_on, tree value)
2312 unsigned i, s;
2314 if (cost == INFTY)
2316 BITMAP_FREE (depends_on);
2317 return;
2320 if (data->consider_all_candidates)
2322 use->cost_map[cand->id].cand = cand;
2323 use->cost_map[cand->id].cost = cost;
2324 use->cost_map[cand->id].depends_on = depends_on;
2325 use->cost_map[cand->id].value = value;
2326 return;
2329 /* n_map_members is a power of two, so this computes modulo. */
2330 s = cand->id & (use->n_map_members - 1);
2331 for (i = s; i < use->n_map_members; i++)
2332 if (!use->cost_map[i].cand)
2333 goto found;
2334 for (i = 0; i < s; i++)
2335 if (!use->cost_map[i].cand)
2336 goto found;
2338 gcc_unreachable ();
2340 found:
2341 use->cost_map[i].cand = cand;
2342 use->cost_map[i].cost = cost;
2343 use->cost_map[i].depends_on = depends_on;
2344 use->cost_map[i].value = value;
2347 /* Gets cost of (USE, CANDIDATE) pair. */
2349 static struct cost_pair *
2350 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2351 struct iv_cand *cand)
2353 unsigned i, s;
2354 struct cost_pair *ret;
2356 if (!cand)
2357 return NULL;
2359 if (data->consider_all_candidates)
2361 ret = use->cost_map + cand->id;
2362 if (!ret->cand)
2363 return NULL;
2365 return ret;
2368 /* n_map_members is a power of two, so this computes modulo. */
2369 s = cand->id & (use->n_map_members - 1);
2370 for (i = s; i < use->n_map_members; i++)
2371 if (use->cost_map[i].cand == cand)
2372 return use->cost_map + i;
2374 for (i = 0; i < s; i++)
2375 if (use->cost_map[i].cand == cand)
2376 return use->cost_map + i;
2378 return NULL;
2381 /* Returns estimate on cost of computing SEQ. */
2383 static unsigned
2384 seq_cost (rtx seq)
2386 unsigned cost = 0;
2387 rtx set;
2389 for (; seq; seq = NEXT_INSN (seq))
2391 set = single_set (seq);
2392 if (set)
2393 cost += rtx_cost (set, SET);
2394 else
2395 cost++;
2398 return cost;
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2402 static rtx
2403 produce_memory_decl_rtl (tree obj, int *regno)
2405 rtx x;
2407 gcc_assert (obj);
2408 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2410 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2411 x = gen_rtx_SYMBOL_REF (Pmode, name);
2413 else
2414 x = gen_raw_REG (Pmode, (*regno)++);
2416 return gen_rtx_MEM (DECL_MODE (obj), x);
2419 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2420 walk_tree. DATA contains the actual fake register number. */
2422 static tree
2423 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2425 tree obj = NULL_TREE;
2426 rtx x = NULL_RTX;
2427 int *regno = data;
2429 switch (TREE_CODE (*expr_p))
2431 case ADDR_EXPR:
2432 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2433 handled_component_p (*expr_p);
2434 expr_p = &TREE_OPERAND (*expr_p, 0))
2435 continue;
2436 obj = *expr_p;
2437 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2438 x = produce_memory_decl_rtl (obj, regno);
2439 break;
2441 case SSA_NAME:
2442 *ws = 0;
2443 obj = SSA_NAME_VAR (*expr_p);
2444 if (!DECL_RTL_SET_P (obj))
2445 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446 break;
2448 case VAR_DECL:
2449 case PARM_DECL:
2450 case RESULT_DECL:
2451 *ws = 0;
2452 obj = *expr_p;
2454 if (DECL_RTL_SET_P (obj))
2455 break;
2457 if (DECL_MODE (obj) == BLKmode)
2458 x = produce_memory_decl_rtl (obj, regno);
2459 else
2460 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2462 break;
2464 default:
2465 break;
2468 if (x)
2470 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2471 SET_DECL_RTL (obj, x);
2474 return NULL_TREE;
2477 /* Determines cost of the computation of EXPR. */
2479 static unsigned
2480 computation_cost (tree expr)
2482 rtx seq, rslt;
2483 tree type = TREE_TYPE (expr);
2484 unsigned cost;
2485 /* Avoid using hard regs in ways which may be unsupported. */
2486 int regno = LAST_VIRTUAL_REGISTER + 1;
2488 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2489 start_sequence ();
2490 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2491 seq = get_insns ();
2492 end_sequence ();
2494 cost = seq_cost (seq);
2495 if (MEM_P (rslt))
2496 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2498 return cost;
2501 /* Returns variable containing the value of candidate CAND at statement AT. */
2503 static tree
2504 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2506 if (stmt_after_increment (loop, cand, stmt))
2507 return cand->var_after;
2508 else
2509 return cand->var_before;
2512 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2513 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2516 tree_int_cst_sign_bit (tree t)
2518 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2519 unsigned HOST_WIDE_INT w;
2521 if (bitno < HOST_BITS_PER_WIDE_INT)
2522 w = TREE_INT_CST_LOW (t);
2523 else
2525 w = TREE_INT_CST_HIGH (t);
2526 bitno -= HOST_BITS_PER_WIDE_INT;
2529 return (w >> bitno) & 1;
2532 /* If we can prove that TOP = cst * BOT for some constant cst,
2533 store cst to MUL and return true. Otherwise return false.
2534 The returned value is always sign-extended, regardless of the
2535 signedness of TOP and BOT. */
2537 static bool
2538 constant_multiple_of (tree top, tree bot, double_int *mul)
2540 tree mby;
2541 enum tree_code code;
2542 double_int res, p0, p1;
2543 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2545 STRIP_NOPS (top);
2546 STRIP_NOPS (bot);
2548 if (operand_equal_p (top, bot, 0))
2550 *mul = double_int_one;
2551 return true;
2554 code = TREE_CODE (top);
2555 switch (code)
2557 case MULT_EXPR:
2558 mby = TREE_OPERAND (top, 1);
2559 if (TREE_CODE (mby) != INTEGER_CST)
2560 return false;
2562 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2563 return false;
2565 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2566 precision);
2567 return true;
2569 case PLUS_EXPR:
2570 case MINUS_EXPR:
2571 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2572 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2573 return false;
2575 if (code == MINUS_EXPR)
2576 p1 = double_int_neg (p1);
2577 *mul = double_int_sext (double_int_add (p0, p1), precision);
2578 return true;
2580 case INTEGER_CST:
2581 if (TREE_CODE (bot) != INTEGER_CST)
2582 return false;
2584 p0 = double_int_sext (tree_to_double_int (top), precision);
2585 p1 = double_int_sext (tree_to_double_int (bot), precision);
2586 if (double_int_zero_p (p1))
2587 return false;
2588 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2589 precision);
2590 return double_int_zero_p (res);
2592 default:
2593 return false;
2597 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2598 same precision that is at least as wide as the precision of TYPE, stores
2599 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2600 type of A and B. */
2602 static tree
2603 determine_common_wider_type (tree *a, tree *b)
2605 tree wider_type = NULL;
2606 tree suba, subb;
2607 tree atype = TREE_TYPE (*a);
2609 if ((TREE_CODE (*a) == NOP_EXPR
2610 || TREE_CODE (*a) == CONVERT_EXPR))
2612 suba = TREE_OPERAND (*a, 0);
2613 wider_type = TREE_TYPE (suba);
2614 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2615 return atype;
2617 else
2618 return atype;
2620 if ((TREE_CODE (*b) == NOP_EXPR
2621 || TREE_CODE (*b) == CONVERT_EXPR))
2623 subb = TREE_OPERAND (*b, 0);
2624 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2625 return atype;
2627 else
2628 return atype;
2630 *a = suba;
2631 *b = subb;
2632 return wider_type;
2635 /* Determines the expression by that USE is expressed from induction variable
2636 CAND at statement AT in LOOP. The expression is stored in a decomposed
2637 form into AFF. Returns false if USE cannot be expressed using CAND. */
2639 static bool
2640 get_computation_aff (struct loop *loop,
2641 struct iv_use *use, struct iv_cand *cand, tree at,
2642 struct affine_tree_combination *aff)
2644 tree ubase = use->iv->base;
2645 tree ustep = use->iv->step;
2646 tree cbase = cand->iv->base;
2647 tree cstep = cand->iv->step, cstep_common;
2648 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2649 tree common_type, var;
2650 tree uutype;
2651 aff_tree cbase_aff, var_aff;
2652 double_int rat;
2654 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2656 /* We do not have a precision to express the values of use. */
2657 return false;
2660 var = var_at_stmt (loop, cand, at);
2661 uutype = unsigned_type_for (utype);
2663 /* If the conversion is not noop, perform it. */
2664 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2666 cstep = fold_convert (uutype, cstep);
2667 cbase = fold_convert (uutype, cbase);
2668 var = fold_convert (uutype, var);
2671 if (!constant_multiple_of (ustep, cstep, &rat))
2672 return false;
2674 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2675 type, we achieve better folding by computing their difference in this
2676 wider type, and cast the result to UUTYPE. We do not need to worry about
2677 overflows, as all the arithmetics will in the end be performed in UUTYPE
2678 anyway. */
2679 common_type = determine_common_wider_type (&ubase, &cbase);
2681 /* use = ubase - ratio * cbase + ratio * var. */
2682 tree_to_aff_combination (ubase, common_type, aff);
2683 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2684 tree_to_aff_combination (var, uutype, &var_aff);
2686 /* We need to shift the value if we are after the increment. */
2687 if (stmt_after_increment (loop, cand, at))
2689 aff_tree cstep_aff;
2691 if (common_type != uutype)
2692 cstep_common = fold_convert (common_type, cstep);
2693 else
2694 cstep_common = cstep;
2696 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2697 aff_combination_add (&cbase_aff, &cstep_aff);
2700 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2701 aff_combination_add (aff, &cbase_aff);
2702 if (common_type != uutype)
2703 aff_combination_convert (aff, uutype);
2705 aff_combination_scale (&var_aff, rat);
2706 aff_combination_add (aff, &var_aff);
2708 return true;
2711 /* Determines the expression by that USE is expressed from induction variable
2712 CAND at statement AT in LOOP. The computation is unshared. */
2714 static tree
2715 get_computation_at (struct loop *loop,
2716 struct iv_use *use, struct iv_cand *cand, tree at)
2718 aff_tree aff;
2719 tree type = TREE_TYPE (use->iv->base);
2721 if (!get_computation_aff (loop, use, cand, at, &aff))
2722 return NULL_TREE;
2723 unshare_aff_combination (&aff);
2724 return fold_convert (type, aff_combination_to_tree (&aff));
2727 /* Determines the expression by that USE is expressed from induction variable
2728 CAND in LOOP. The computation is unshared. */
2730 static tree
2731 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2733 return get_computation_at (loop, use, cand, use->stmt);
2736 /* Returns cost of addition in MODE. */
2738 static unsigned
2739 add_cost (enum machine_mode mode)
2741 static unsigned costs[NUM_MACHINE_MODES];
2742 rtx seq;
2743 unsigned cost;
2745 if (costs[mode])
2746 return costs[mode];
2748 start_sequence ();
2749 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2750 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2751 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2752 NULL_RTX);
2753 seq = get_insns ();
2754 end_sequence ();
2756 cost = seq_cost (seq);
2757 if (!cost)
2758 cost = 1;
2760 costs[mode] = cost;
2762 if (dump_file && (dump_flags & TDF_DETAILS))
2763 fprintf (dump_file, "Addition in %s costs %d\n",
2764 GET_MODE_NAME (mode), cost);
2765 return cost;
2768 /* Entry in a hashtable of already known costs for multiplication. */
2769 struct mbc_entry
2771 HOST_WIDE_INT cst; /* The constant to multiply by. */
2772 enum machine_mode mode; /* In mode. */
2773 unsigned cost; /* The cost. */
2776 /* Counts hash value for the ENTRY. */
2778 static hashval_t
2779 mbc_entry_hash (const void *entry)
2781 const struct mbc_entry *e = entry;
2783 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2786 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2788 static int
2789 mbc_entry_eq (const void *entry1, const void *entry2)
2791 const struct mbc_entry *e1 = entry1;
2792 const struct mbc_entry *e2 = entry2;
2794 return (e1->mode == e2->mode
2795 && e1->cst == e2->cst);
2798 /* Returns cost of multiplication by constant CST in MODE. */
2800 unsigned
2801 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2803 static htab_t costs;
2804 struct mbc_entry **cached, act;
2805 rtx seq;
2806 unsigned cost;
2808 if (!costs)
2809 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2811 act.mode = mode;
2812 act.cst = cst;
2813 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2814 if (*cached)
2815 return (*cached)->cost;
2817 *cached = XNEW (struct mbc_entry);
2818 (*cached)->mode = mode;
2819 (*cached)->cst = cst;
2821 start_sequence ();
2822 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2823 gen_int_mode (cst, mode), NULL_RTX, 0);
2824 seq = get_insns ();
2825 end_sequence ();
2827 cost = seq_cost (seq);
2829 if (dump_file && (dump_flags & TDF_DETAILS))
2830 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2831 (int) cst, GET_MODE_NAME (mode), cost);
2833 (*cached)->cost = cost;
2835 return cost;
2838 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2839 validity for a memory reference accessing memory of mode MODE. */
2841 bool
2842 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2844 #define MAX_RATIO 128
2845 static sbitmap valid_mult[MAX_MACHINE_MODE];
2847 if (!valid_mult[mode])
2849 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2850 rtx addr;
2851 HOST_WIDE_INT i;
2853 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2854 sbitmap_zero (valid_mult[mode]);
2855 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2856 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2858 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2859 if (memory_address_p (mode, addr))
2860 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2863 if (dump_file && (dump_flags & TDF_DETAILS))
2865 fprintf (dump_file, " allowed multipliers:");
2866 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2867 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2868 fprintf (dump_file, " %d", (int) i);
2869 fprintf (dump_file, "\n");
2870 fprintf (dump_file, "\n");
2874 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2875 return false;
2877 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2880 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2881 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2882 variable is omitted. Compute the cost for a memory reference that accesses
2883 a memory location of mode MEM_MODE.
2885 TODO -- there must be some better way. This all is quite crude. */
2887 static unsigned
2888 get_address_cost (bool symbol_present, bool var_present,
2889 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2890 enum machine_mode mem_mode)
2892 static bool initialized[MAX_MACHINE_MODE];
2893 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2894 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2895 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2896 unsigned cost, acost;
2897 bool offset_p, ratio_p;
2898 HOST_WIDE_INT s_offset;
2899 unsigned HOST_WIDE_INT mask;
2900 unsigned bits;
2902 if (!initialized[mem_mode])
2904 HOST_WIDE_INT i;
2905 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2906 int old_cse_not_expected;
2907 unsigned sym_p, var_p, off_p, rat_p, add_c;
2908 rtx seq, addr, base;
2909 rtx reg0, reg1;
2911 initialized[mem_mode] = true;
2913 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2915 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2916 for (i = start; i <= 1 << 20; i <<= 1)
2918 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2919 if (!memory_address_p (mem_mode, addr))
2920 break;
2922 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2923 off[mem_mode] = max_offset[mem_mode];
2925 for (i = start; i <= 1 << 20; i <<= 1)
2927 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2928 if (!memory_address_p (mem_mode, addr))
2929 break;
2931 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2933 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "get_address_cost:\n");
2936 fprintf (dump_file, " min offset %s %d\n",
2937 GET_MODE_NAME (mem_mode),
2938 (int) min_offset[mem_mode]);
2939 fprintf (dump_file, " max offset %s %d\n",
2940 GET_MODE_NAME (mem_mode),
2941 (int) max_offset[mem_mode]);
2944 rat[mem_mode] = 1;
2945 for (i = 2; i <= MAX_RATIO; i++)
2946 if (multiplier_allowed_in_address_p (i, mem_mode))
2948 rat[mem_mode] = i;
2949 break;
2952 /* Compute the cost of various addressing modes. */
2953 acost = 0;
2954 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2955 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2957 for (i = 0; i < 16; i++)
2959 sym_p = i & 1;
2960 var_p = (i >> 1) & 1;
2961 off_p = (i >> 2) & 1;
2962 rat_p = (i >> 3) & 1;
2964 addr = reg0;
2965 if (rat_p)
2966 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2967 gen_int_mode (rat[mem_mode], Pmode));
2969 if (var_p)
2970 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2972 if (sym_p)
2974 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2975 if (off_p)
2976 base = gen_rtx_fmt_e (CONST, Pmode,
2977 gen_rtx_fmt_ee (PLUS, Pmode,
2978 base,
2979 gen_int_mode (off[mem_mode],
2980 Pmode)));
2982 else if (off_p)
2983 base = gen_int_mode (off[mem_mode], Pmode);
2984 else
2985 base = NULL_RTX;
2987 if (base)
2988 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2990 start_sequence ();
2991 /* To avoid splitting addressing modes, pretend that no cse will
2992 follow. */
2993 old_cse_not_expected = cse_not_expected;
2994 cse_not_expected = true;
2995 addr = memory_address (mem_mode, addr);
2996 cse_not_expected = old_cse_not_expected;
2997 seq = get_insns ();
2998 end_sequence ();
3000 acost = seq_cost (seq);
3001 acost += address_cost (addr, mem_mode);
3003 if (!acost)
3004 acost = 1;
3005 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3008 /* On some targets, it is quite expensive to load symbol to a register,
3009 which makes addresses that contain symbols look much more expensive.
3010 However, the symbol will have to be loaded in any case before the
3011 loop (and quite likely we have it in register already), so it does not
3012 make much sense to penalize them too heavily. So make some final
3013 tweaks for the SYMBOL_PRESENT modes:
3015 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3016 var is cheaper, use this mode with small penalty.
3017 If VAR_PRESENT is true, try whether the mode with
3018 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3019 if this is the case, use it. */
3020 add_c = add_cost (Pmode);
3021 for (i = 0; i < 8; i++)
3023 var_p = i & 1;
3024 off_p = (i >> 1) & 1;
3025 rat_p = (i >> 2) & 1;
3027 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3028 if (var_p)
3029 acost += add_c;
3031 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3032 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3035 if (dump_file && (dump_flags & TDF_DETAILS))
3037 fprintf (dump_file, "Address costs:\n");
3039 for (i = 0; i < 16; i++)
3041 sym_p = i & 1;
3042 var_p = (i >> 1) & 1;
3043 off_p = (i >> 2) & 1;
3044 rat_p = (i >> 3) & 1;
3046 fprintf (dump_file, " ");
3047 if (sym_p)
3048 fprintf (dump_file, "sym + ");
3049 if (var_p)
3050 fprintf (dump_file, "var + ");
3051 if (off_p)
3052 fprintf (dump_file, "cst + ");
3053 if (rat_p)
3054 fprintf (dump_file, "rat * ");
3056 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3057 fprintf (dump_file, "index costs %d\n", acost);
3059 fprintf (dump_file, "\n");
3063 bits = GET_MODE_BITSIZE (Pmode);
3064 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3065 offset &= mask;
3066 if ((offset >> (bits - 1) & 1))
3067 offset |= ~mask;
3068 s_offset = offset;
3070 cost = 0;
3071 offset_p = (s_offset != 0
3072 && min_offset[mem_mode] <= s_offset
3073 && s_offset <= max_offset[mem_mode]);
3074 ratio_p = (ratio != 1
3075 && multiplier_allowed_in_address_p (ratio, mem_mode));
3077 if (ratio != 1 && !ratio_p)
3078 cost += multiply_by_cost (ratio, Pmode);
3080 if (s_offset && !offset_p && !symbol_present)
3081 cost += add_cost (Pmode);
3083 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3084 return cost + acost;
3087 /* Estimates cost of forcing expression EXPR into a variable. */
3089 unsigned
3090 force_expr_to_var_cost (tree expr)
3092 static bool costs_initialized = false;
3093 static unsigned integer_cost;
3094 static unsigned symbol_cost;
3095 static unsigned address_cost;
3096 tree op0, op1;
3097 unsigned cost0, cost1, cost;
3098 enum machine_mode mode;
3100 if (!costs_initialized)
3102 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3103 rtx x = gen_rtx_MEM (DECL_MODE (var),
3104 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3105 tree addr;
3106 tree type = build_pointer_type (integer_type_node);
3108 integer_cost = computation_cost (build_int_cst (integer_type_node,
3109 2000));
3111 SET_DECL_RTL (var, x);
3112 TREE_STATIC (var) = 1;
3113 addr = build1 (ADDR_EXPR, type, var);
3114 symbol_cost = computation_cost (addr) + 1;
3116 address_cost
3117 = computation_cost (build2 (PLUS_EXPR, type,
3118 addr,
3119 build_int_cst (type, 2000))) + 1;
3120 if (dump_file && (dump_flags & TDF_DETAILS))
3122 fprintf (dump_file, "force_expr_to_var_cost:\n");
3123 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3124 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3125 fprintf (dump_file, " address %d\n", (int) address_cost);
3126 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3127 fprintf (dump_file, "\n");
3130 costs_initialized = true;
3133 STRIP_NOPS (expr);
3135 if (SSA_VAR_P (expr))
3136 return 0;
3138 if (TREE_INVARIANT (expr))
3140 if (TREE_CODE (expr) == INTEGER_CST)
3141 return integer_cost;
3143 if (TREE_CODE (expr) == ADDR_EXPR)
3145 tree obj = TREE_OPERAND (expr, 0);
3147 if (TREE_CODE (obj) == VAR_DECL
3148 || TREE_CODE (obj) == PARM_DECL
3149 || TREE_CODE (obj) == RESULT_DECL)
3150 return symbol_cost;
3153 return address_cost;
3156 switch (TREE_CODE (expr))
3158 case PLUS_EXPR:
3159 case MINUS_EXPR:
3160 case MULT_EXPR:
3161 op0 = TREE_OPERAND (expr, 0);
3162 op1 = TREE_OPERAND (expr, 1);
3163 STRIP_NOPS (op0);
3164 STRIP_NOPS (op1);
3166 if (is_gimple_val (op0))
3167 cost0 = 0;
3168 else
3169 cost0 = force_expr_to_var_cost (op0);
3171 if (is_gimple_val (op1))
3172 cost1 = 0;
3173 else
3174 cost1 = force_expr_to_var_cost (op1);
3176 break;
3178 default:
3179 /* Just an arbitrary value, FIXME. */
3180 return target_spill_cost;
3183 mode = TYPE_MODE (TREE_TYPE (expr));
3184 switch (TREE_CODE (expr))
3186 case PLUS_EXPR:
3187 case MINUS_EXPR:
3188 cost = add_cost (mode);
3189 break;
3191 case MULT_EXPR:
3192 if (cst_and_fits_in_hwi (op0))
3193 cost = multiply_by_cost (int_cst_value (op0), mode);
3194 else if (cst_and_fits_in_hwi (op1))
3195 cost = multiply_by_cost (int_cst_value (op1), mode);
3196 else
3197 return target_spill_cost;
3198 break;
3200 default:
3201 gcc_unreachable ();
3204 cost += cost0;
3205 cost += cost1;
3207 /* Bound the cost by target_spill_cost. The parts of complicated
3208 computations often are either loop invariant or at least can
3209 be shared between several iv uses, so letting this grow without
3210 limits would not give reasonable results. */
3211 return cost < target_spill_cost ? cost : target_spill_cost;
3214 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3215 invariants the computation depends on. */
3217 static unsigned
3218 force_var_cost (struct ivopts_data *data,
3219 tree expr, bitmap *depends_on)
3221 if (depends_on)
3223 fd_ivopts_data = data;
3224 walk_tree (&expr, find_depends, depends_on, NULL);
3227 return force_expr_to_var_cost (expr);
3230 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3231 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3232 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3233 invariants the computation depends on. */
3235 static unsigned
3236 split_address_cost (struct ivopts_data *data,
3237 tree addr, bool *symbol_present, bool *var_present,
3238 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3240 tree core;
3241 HOST_WIDE_INT bitsize;
3242 HOST_WIDE_INT bitpos;
3243 tree toffset;
3244 enum machine_mode mode;
3245 int unsignedp, volatilep;
3247 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3248 &unsignedp, &volatilep, false);
3250 if (toffset != 0
3251 || bitpos % BITS_PER_UNIT != 0
3252 || TREE_CODE (core) != VAR_DECL)
3254 *symbol_present = false;
3255 *var_present = true;
3256 fd_ivopts_data = data;
3257 walk_tree (&addr, find_depends, depends_on, NULL);
3258 return target_spill_cost;
3261 *offset += bitpos / BITS_PER_UNIT;
3262 if (TREE_STATIC (core)
3263 || DECL_EXTERNAL (core))
3265 *symbol_present = true;
3266 *var_present = false;
3267 return 0;
3270 *symbol_present = false;
3271 *var_present = true;
3272 return 0;
3275 /* Estimates cost of expressing difference of addresses E1 - E2 as
3276 var + symbol + offset. The value of offset is added to OFFSET,
3277 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3278 part is missing. DEPENDS_ON is a set of the invariants the computation
3279 depends on. */
3281 static unsigned
3282 ptr_difference_cost (struct ivopts_data *data,
3283 tree e1, tree e2, bool *symbol_present, bool *var_present,
3284 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3286 HOST_WIDE_INT diff = 0;
3287 unsigned cost;
3289 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3291 if (ptr_difference_const (e1, e2, &diff))
3293 *offset += diff;
3294 *symbol_present = false;
3295 *var_present = false;
3296 return 0;
3299 if (e2 == integer_zero_node)
3300 return split_address_cost (data, TREE_OPERAND (e1, 0),
3301 symbol_present, var_present, offset, depends_on);
3303 *symbol_present = false;
3304 *var_present = true;
3306 cost = force_var_cost (data, e1, depends_on);
3307 cost += force_var_cost (data, e2, depends_on);
3308 cost += add_cost (Pmode);
3310 return cost;
3313 /* Estimates cost of expressing difference E1 - E2 as
3314 var + symbol + offset. The value of offset is added to OFFSET,
3315 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3316 part is missing. DEPENDS_ON is a set of the invariants the computation
3317 depends on. */
3319 static unsigned
3320 difference_cost (struct ivopts_data *data,
3321 tree e1, tree e2, bool *symbol_present, bool *var_present,
3322 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3324 unsigned cost;
3325 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3326 unsigned HOST_WIDE_INT off1, off2;
3328 e1 = strip_offset (e1, &off1);
3329 e2 = strip_offset (e2, &off2);
3330 *offset += off1 - off2;
3332 STRIP_NOPS (e1);
3333 STRIP_NOPS (e2);
3335 if (TREE_CODE (e1) == ADDR_EXPR)
3336 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3337 depends_on);
3338 *symbol_present = false;
3340 if (operand_equal_p (e1, e2, 0))
3342 *var_present = false;
3343 return 0;
3345 *var_present = true;
3346 if (integer_zerop (e2))
3347 return force_var_cost (data, e1, depends_on);
3349 if (integer_zerop (e1))
3351 cost = force_var_cost (data, e2, depends_on);
3352 cost += multiply_by_cost (-1, mode);
3354 return cost;
3357 cost = force_var_cost (data, e1, depends_on);
3358 cost += force_var_cost (data, e2, depends_on);
3359 cost += add_cost (mode);
3361 return cost;
3364 /* Determines the cost of the computation by that USE is expressed
3365 from induction variable CAND. If ADDRESS_P is true, we just need
3366 to create an address from it, otherwise we want to get it into
3367 register. A set of invariants we depend on is stored in
3368 DEPENDS_ON. AT is the statement at that the value is computed. */
3370 static unsigned
3371 get_computation_cost_at (struct ivopts_data *data,
3372 struct iv_use *use, struct iv_cand *cand,
3373 bool address_p, bitmap *depends_on, tree at)
3375 tree ubase = use->iv->base, ustep = use->iv->step;
3376 tree cbase, cstep;
3377 tree utype = TREE_TYPE (ubase), ctype;
3378 unsigned HOST_WIDE_INT cstepi, offset = 0;
3379 HOST_WIDE_INT ratio, aratio;
3380 bool var_present, symbol_present;
3381 unsigned cost = 0, n_sums;
3382 double_int rat;
3384 *depends_on = NULL;
3386 /* Only consider real candidates. */
3387 if (!cand->iv)
3388 return INFTY;
3390 cbase = cand->iv->base;
3391 cstep = cand->iv->step;
3392 ctype = TREE_TYPE (cbase);
3394 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3396 /* We do not have a precision to express the values of use. */
3397 return INFTY;
3400 if (address_p)
3402 /* Do not try to express address of an object with computation based
3403 on address of a different object. This may cause problems in rtl
3404 level alias analysis (that does not expect this to be happening,
3405 as this is illegal in C), and would be unlikely to be useful
3406 anyway. */
3407 if (use->iv->base_object
3408 && cand->iv->base_object
3409 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3410 return INFTY;
3413 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3415 /* TODO -- add direct handling of this case. */
3416 goto fallback;
3419 /* CSTEPI is removed from the offset in case statement is after the
3420 increment. If the step is not constant, we use zero instead.
3421 This is a bit imprecise (there is the extra addition), but
3422 redundancy elimination is likely to transform the code so that
3423 it uses value of the variable before increment anyway,
3424 so it is not that much unrealistic. */
3425 if (cst_and_fits_in_hwi (cstep))
3426 cstepi = int_cst_value (cstep);
3427 else
3428 cstepi = 0;
3430 if (!constant_multiple_of (ustep, cstep, &rat))
3431 return INFTY;
3433 if (double_int_fits_in_shwi_p (rat))
3434 ratio = double_int_to_shwi (rat);
3435 else
3436 return INFTY;
3438 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3439 or ratio == 1, it is better to handle this like
3441 ubase - ratio * cbase + ratio * var
3443 (also holds in the case ratio == -1, TODO. */
3445 if (cst_and_fits_in_hwi (cbase))
3447 offset = - ratio * int_cst_value (cbase);
3448 cost += difference_cost (data,
3449 ubase, integer_zero_node,
3450 &symbol_present, &var_present, &offset,
3451 depends_on);
3453 else if (ratio == 1)
3455 cost += difference_cost (data,
3456 ubase, cbase,
3457 &symbol_present, &var_present, &offset,
3458 depends_on);
3460 else
3462 cost += force_var_cost (data, cbase, depends_on);
3463 cost += add_cost (TYPE_MODE (ctype));
3464 cost += difference_cost (data,
3465 ubase, integer_zero_node,
3466 &symbol_present, &var_present, &offset,
3467 depends_on);
3470 /* If we are after the increment, the value of the candidate is higher by
3471 one iteration. */
3472 if (stmt_after_increment (data->current_loop, cand, at))
3473 offset -= ratio * cstepi;
3475 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3476 (symbol/var/const parts may be omitted). If we are looking for an address,
3477 find the cost of addressing this. */
3478 if (address_p)
3479 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3480 TYPE_MODE (TREE_TYPE (*use->op_p)));
3482 /* Otherwise estimate the costs for computing the expression. */
3483 aratio = ratio > 0 ? ratio : -ratio;
3484 if (!symbol_present && !var_present && !offset)
3486 if (ratio != 1)
3487 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3489 return cost;
3492 if (aratio != 1)
3493 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3495 n_sums = 1;
3496 if (var_present
3497 /* Symbol + offset should be compile-time computable. */
3498 && (symbol_present || offset))
3499 n_sums++;
3501 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3503 fallback:
3505 /* Just get the expression, expand it and measure the cost. */
3506 tree comp = get_computation_at (data->current_loop, use, cand, at);
3508 if (!comp)
3509 return INFTY;
3511 if (address_p)
3512 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3514 return computation_cost (comp);
3518 /* Determines the cost of the computation by that USE is expressed
3519 from induction variable CAND. If ADDRESS_P is true, we just need
3520 to create an address from it, otherwise we want to get it into
3521 register. A set of invariants we depend on is stored in
3522 DEPENDS_ON. */
3524 static unsigned
3525 get_computation_cost (struct ivopts_data *data,
3526 struct iv_use *use, struct iv_cand *cand,
3527 bool address_p, bitmap *depends_on)
3529 return get_computation_cost_at (data,
3530 use, cand, address_p, depends_on, use->stmt);
3533 /* Determines cost of basing replacement of USE on CAND in a generic
3534 expression. */
3536 static bool
3537 determine_use_iv_cost_generic (struct ivopts_data *data,
3538 struct iv_use *use, struct iv_cand *cand)
3540 bitmap depends_on;
3541 unsigned cost;
3543 /* The simple case first -- if we need to express value of the preserved
3544 original biv, the cost is 0. This also prevents us from counting the
3545 cost of increment twice -- once at this use and once in the cost of
3546 the candidate. */
3547 if (cand->pos == IP_ORIGINAL
3548 && cand->incremented_at == use->stmt)
3550 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3551 return true;
3554 cost = get_computation_cost (data, use, cand, false, &depends_on);
3555 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3557 return cost != INFTY;
3560 /* Determines cost of basing replacement of USE on CAND in an address. */
3562 static bool
3563 determine_use_iv_cost_address (struct ivopts_data *data,
3564 struct iv_use *use, struct iv_cand *cand)
3566 bitmap depends_on;
3567 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3569 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3571 return cost != INFTY;
3574 /* Computes value of candidate CAND at position AT in iteration NITER, and
3575 stores it to VAL. */
3577 static void
3578 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3579 aff_tree *val)
3581 aff_tree step, delta, nit;
3582 struct iv *iv = cand->iv;
3583 tree type = TREE_TYPE (iv->base);
3585 tree_to_aff_combination (iv->step, type, &step);
3586 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3587 aff_combination_convert (&nit, type);
3588 aff_combination_mult (&nit, &step, &delta);
3589 if (stmt_after_increment (loop, cand, at))
3590 aff_combination_add (&delta, &step);
3592 tree_to_aff_combination (iv->base, type, val);
3593 aff_combination_add (val, &delta);
3596 /* Returns period of induction variable iv. */
3598 static tree
3599 iv_period (struct iv *iv)
3601 tree step = iv->step, period, type;
3602 tree pow2div;
3604 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3606 /* Period of the iv is gcd (step, type range). Since type range is power
3607 of two, it suffices to determine the maximum power of two that divides
3608 step. */
3609 pow2div = num_ending_zeros (step);
3610 type = unsigned_type_for (TREE_TYPE (step));
3612 period = build_low_bits_mask (type,
3613 (TYPE_PRECISION (type)
3614 - tree_low_cst (pow2div, 1)));
3616 return period;
3619 /* Returns the comparison operator used when eliminating the iv USE. */
3621 static enum tree_code
3622 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3624 struct loop *loop = data->current_loop;
3625 basic_block ex_bb;
3626 edge exit;
3628 ex_bb = bb_for_stmt (use->stmt);
3629 exit = EDGE_SUCC (ex_bb, 0);
3630 if (flow_bb_inside_loop_p (loop, exit->dest))
3631 exit = EDGE_SUCC (ex_bb, 1);
3633 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3636 /* Check whether it is possible to express the condition in USE by comparison
3637 of candidate CAND. If so, store the value compared with to BOUND. */
3639 static bool
3640 may_eliminate_iv (struct ivopts_data *data,
3641 struct iv_use *use, struct iv_cand *cand, tree *bound)
3643 basic_block ex_bb;
3644 edge exit;
3645 tree nit, nit_type;
3646 tree wider_type, period, per_type;
3647 struct loop *loop = data->current_loop;
3648 aff_tree bnd;
3650 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3651 return false;
3653 /* For now works only for exits that dominate the loop latch. TODO -- extend
3654 for other conditions inside loop body. */
3655 ex_bb = bb_for_stmt (use->stmt);
3656 if (use->stmt != last_stmt (ex_bb)
3657 || TREE_CODE (use->stmt) != COND_EXPR)
3658 return false;
3659 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3660 return false;
3662 exit = EDGE_SUCC (ex_bb, 0);
3663 if (flow_bb_inside_loop_p (loop, exit->dest))
3664 exit = EDGE_SUCC (ex_bb, 1);
3665 if (flow_bb_inside_loop_p (loop, exit->dest))
3666 return false;
3668 nit = niter_for_exit (data, exit);
3669 if (!nit)
3670 return false;
3672 nit_type = TREE_TYPE (nit);
3674 /* Determine whether we may use the variable to test whether niter iterations
3675 elapsed. This is the case iff the period of the induction variable is
3676 greater than the number of iterations. */
3677 period = iv_period (cand->iv);
3678 if (!period)
3679 return false;
3680 per_type = TREE_TYPE (period);
3682 wider_type = TREE_TYPE (period);
3683 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3684 wider_type = per_type;
3685 else
3686 wider_type = nit_type;
3688 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3689 fold_convert (wider_type, period),
3690 fold_convert (wider_type, nit))))
3691 return false;
3693 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3694 *bound = aff_combination_to_tree (&bnd);
3695 return true;
3698 /* Determines cost of basing replacement of USE on CAND in a condition. */
3700 static bool
3701 determine_use_iv_cost_condition (struct ivopts_data *data,
3702 struct iv_use *use, struct iv_cand *cand)
3704 tree bound = NULL_TREE;
3705 struct iv *cmp_iv;
3706 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3707 unsigned elim_cost, express_cost, cost;
3708 bool ok;
3710 /* Only consider real candidates. */
3711 if (!cand->iv)
3713 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3714 return false;
3717 /* Try iv elimination. */
3718 if (may_eliminate_iv (data, use, cand, &bound))
3719 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3720 else
3721 elim_cost = INFTY;
3723 /* Try expressing the original giv. If it is compared with an invariant,
3724 note that we cannot get rid of it. */
3725 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3726 gcc_assert (ok);
3728 express_cost = get_computation_cost (data, use, cand, false,
3729 &depends_on_express);
3730 fd_ivopts_data = data;
3731 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3733 /* Choose the better approach. */
3734 if (elim_cost < express_cost)
3736 cost = elim_cost;
3737 depends_on = depends_on_elim;
3738 depends_on_elim = NULL;
3740 else
3742 cost = express_cost;
3743 depends_on = depends_on_express;
3744 depends_on_express = NULL;
3745 bound = NULL_TREE;
3748 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3750 if (depends_on_elim)
3751 BITMAP_FREE (depends_on_elim);
3752 if (depends_on_express)
3753 BITMAP_FREE (depends_on_express);
3755 return cost != INFTY;
3758 /* Determines cost of basing replacement of USE on CAND. Returns false
3759 if USE cannot be based on CAND. */
3761 static bool
3762 determine_use_iv_cost (struct ivopts_data *data,
3763 struct iv_use *use, struct iv_cand *cand)
3765 switch (use->type)
3767 case USE_NONLINEAR_EXPR:
3768 return determine_use_iv_cost_generic (data, use, cand);
3770 case USE_ADDRESS:
3771 return determine_use_iv_cost_address (data, use, cand);
3773 case USE_COMPARE:
3774 return determine_use_iv_cost_condition (data, use, cand);
3776 default:
3777 gcc_unreachable ();
3781 /* Determines costs of basing the use of the iv on an iv candidate. */
3783 static void
3784 determine_use_iv_costs (struct ivopts_data *data)
3786 unsigned i, j;
3787 struct iv_use *use;
3788 struct iv_cand *cand;
3789 bitmap to_clear = BITMAP_ALLOC (NULL);
3791 alloc_use_cost_map (data);
3793 for (i = 0; i < n_iv_uses (data); i++)
3795 use = iv_use (data, i);
3797 if (data->consider_all_candidates)
3799 for (j = 0; j < n_iv_cands (data); j++)
3801 cand = iv_cand (data, j);
3802 determine_use_iv_cost (data, use, cand);
3805 else
3807 bitmap_iterator bi;
3809 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3811 cand = iv_cand (data, j);
3812 if (!determine_use_iv_cost (data, use, cand))
3813 bitmap_set_bit (to_clear, j);
3816 /* Remove the candidates for that the cost is infinite from
3817 the list of related candidates. */
3818 bitmap_and_compl_into (use->related_cands, to_clear);
3819 bitmap_clear (to_clear);
3823 BITMAP_FREE (to_clear);
3825 if (dump_file && (dump_flags & TDF_DETAILS))
3827 fprintf (dump_file, "Use-candidate costs:\n");
3829 for (i = 0; i < n_iv_uses (data); i++)
3831 use = iv_use (data, i);
3833 fprintf (dump_file, "Use %d:\n", i);
3834 fprintf (dump_file, " cand\tcost\tdepends on\n");
3835 for (j = 0; j < use->n_map_members; j++)
3837 if (!use->cost_map[j].cand
3838 || use->cost_map[j].cost == INFTY)
3839 continue;
3841 fprintf (dump_file, " %d\t%d\t",
3842 use->cost_map[j].cand->id,
3843 use->cost_map[j].cost);
3844 if (use->cost_map[j].depends_on)
3845 bitmap_print (dump_file,
3846 use->cost_map[j].depends_on, "","");
3847 fprintf (dump_file, "\n");
3850 fprintf (dump_file, "\n");
3852 fprintf (dump_file, "\n");
3856 /* Determines cost of the candidate CAND. */
3858 static void
3859 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3861 unsigned cost_base, cost_step;
3862 tree base;
3864 if (!cand->iv)
3866 cand->cost = 0;
3867 return;
3870 /* There are two costs associated with the candidate -- its increment
3871 and its initialization. The second is almost negligible for any loop
3872 that rolls enough, so we take it just very little into account. */
3874 base = cand->iv->base;
3875 cost_base = force_var_cost (data, base, NULL);
3876 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3878 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3880 /* Prefer the original iv unless we may gain something by replacing it;
3881 this is not really relevant for artificial ivs created by other
3882 passes. */
3883 if (cand->pos == IP_ORIGINAL
3884 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3885 cand->cost--;
3887 /* Prefer not to insert statements into latch unless there are some
3888 already (so that we do not create unnecessary jumps). */
3889 if (cand->pos == IP_END
3890 && empty_block_p (ip_end_pos (data->current_loop)))
3891 cand->cost++;
3894 /* Determines costs of computation of the candidates. */
3896 static void
3897 determine_iv_costs (struct ivopts_data *data)
3899 unsigned i;
3901 if (dump_file && (dump_flags & TDF_DETAILS))
3903 fprintf (dump_file, "Candidate costs:\n");
3904 fprintf (dump_file, " cand\tcost\n");
3907 for (i = 0; i < n_iv_cands (data); i++)
3909 struct iv_cand *cand = iv_cand (data, i);
3911 determine_iv_cost (data, cand);
3913 if (dump_file && (dump_flags & TDF_DETAILS))
3914 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3917 if (dump_file && (dump_flags & TDF_DETAILS))
3918 fprintf (dump_file, "\n");
3921 /* Calculates cost for having SIZE induction variables. */
3923 static unsigned
3924 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3926 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3929 /* For each size of the induction variable set determine the penalty. */
3931 static void
3932 determine_set_costs (struct ivopts_data *data)
3934 unsigned j, n;
3935 tree phi, op;
3936 struct loop *loop = data->current_loop;
3937 bitmap_iterator bi;
3939 /* We use the following model (definitely improvable, especially the
3940 cost function -- TODO):
3942 We estimate the number of registers available (using MD data), name it A.
3944 We estimate the number of registers used by the loop, name it U. This
3945 number is obtained as the number of loop phi nodes (not counting virtual
3946 registers and bivs) + the number of variables from outside of the loop.
3948 We set a reserve R (free regs that are used for temporary computations,
3949 etc.). For now the reserve is a constant 3.
3951 Let I be the number of induction variables.
3953 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3954 make a lot of ivs without a reason).
3955 -- if A - R < U + I <= A, the cost is I * PRES_COST
3956 -- if U + I > A, the cost is I * PRES_COST and
3957 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3959 if (dump_file && (dump_flags & TDF_DETAILS))
3961 fprintf (dump_file, "Global costs:\n");
3962 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3963 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3964 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3965 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3968 n = 0;
3969 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3971 op = PHI_RESULT (phi);
3973 if (!is_gimple_reg (op))
3974 continue;
3976 if (get_iv (data, op))
3977 continue;
3979 n++;
3982 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3984 struct version_info *info = ver_info (data, j);
3986 if (info->inv_id && info->has_nonlin_use)
3987 n++;
3990 data->regs_used = n;
3991 if (dump_file && (dump_flags & TDF_DETAILS))
3992 fprintf (dump_file, " regs_used %d\n", n);
3994 if (dump_file && (dump_flags & TDF_DETAILS))
3996 fprintf (dump_file, " cost for size:\n");
3997 fprintf (dump_file, " ivs\tcost\n");
3998 for (j = 0; j <= 2 * target_avail_regs; j++)
3999 fprintf (dump_file, " %d\t%d\n", j,
4000 ivopts_global_cost_for_size (data, j));
4001 fprintf (dump_file, "\n");
4005 /* Returns true if A is a cheaper cost pair than B. */
4007 static bool
4008 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4010 if (!a)
4011 return false;
4013 if (!b)
4014 return true;
4016 if (a->cost < b->cost)
4017 return true;
4019 if (a->cost > b->cost)
4020 return false;
4022 /* In case the costs are the same, prefer the cheaper candidate. */
4023 if (a->cand->cost < b->cand->cost)
4024 return true;
4026 return false;
4029 /* Computes the cost field of IVS structure. */
4031 static void
4032 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4034 unsigned cost = 0;
4036 cost += ivs->cand_use_cost;
4037 cost += ivs->cand_cost;
4038 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4040 ivs->cost = cost;
4043 /* Remove invariants in set INVS to set IVS. */
4045 static void
4046 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4048 bitmap_iterator bi;
4049 unsigned iid;
4051 if (!invs)
4052 return;
4054 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4056 ivs->n_invariant_uses[iid]--;
4057 if (ivs->n_invariant_uses[iid] == 0)
4058 ivs->n_regs--;
4062 /* Set USE not to be expressed by any candidate in IVS. */
4064 static void
4065 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4066 struct iv_use *use)
4068 unsigned uid = use->id, cid;
4069 struct cost_pair *cp;
4071 cp = ivs->cand_for_use[uid];
4072 if (!cp)
4073 return;
4074 cid = cp->cand->id;
4076 ivs->bad_uses++;
4077 ivs->cand_for_use[uid] = NULL;
4078 ivs->n_cand_uses[cid]--;
4080 if (ivs->n_cand_uses[cid] == 0)
4082 bitmap_clear_bit (ivs->cands, cid);
4083 /* Do not count the pseudocandidates. */
4084 if (cp->cand->iv)
4085 ivs->n_regs--;
4086 ivs->n_cands--;
4087 ivs->cand_cost -= cp->cand->cost;
4089 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4092 ivs->cand_use_cost -= cp->cost;
4094 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4095 iv_ca_recount_cost (data, ivs);
4098 /* Add invariants in set INVS to set IVS. */
4100 static void
4101 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4103 bitmap_iterator bi;
4104 unsigned iid;
4106 if (!invs)
4107 return;
4109 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4111 ivs->n_invariant_uses[iid]++;
4112 if (ivs->n_invariant_uses[iid] == 1)
4113 ivs->n_regs++;
4117 /* Set cost pair for USE in set IVS to CP. */
4119 static void
4120 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4121 struct iv_use *use, struct cost_pair *cp)
4123 unsigned uid = use->id, cid;
4125 if (ivs->cand_for_use[uid] == cp)
4126 return;
4128 if (ivs->cand_for_use[uid])
4129 iv_ca_set_no_cp (data, ivs, use);
4131 if (cp)
4133 cid = cp->cand->id;
4135 ivs->bad_uses--;
4136 ivs->cand_for_use[uid] = cp;
4137 ivs->n_cand_uses[cid]++;
4138 if (ivs->n_cand_uses[cid] == 1)
4140 bitmap_set_bit (ivs->cands, cid);
4141 /* Do not count the pseudocandidates. */
4142 if (cp->cand->iv)
4143 ivs->n_regs++;
4144 ivs->n_cands++;
4145 ivs->cand_cost += cp->cand->cost;
4147 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4150 ivs->cand_use_cost += cp->cost;
4151 iv_ca_set_add_invariants (ivs, cp->depends_on);
4152 iv_ca_recount_cost (data, ivs);
4156 /* Extend set IVS by expressing USE by some of the candidates in it
4157 if possible. */
4159 static void
4160 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4161 struct iv_use *use)
4163 struct cost_pair *best_cp = NULL, *cp;
4164 bitmap_iterator bi;
4165 unsigned i;
4167 gcc_assert (ivs->upto >= use->id);
4169 if (ivs->upto == use->id)
4171 ivs->upto++;
4172 ivs->bad_uses++;
4175 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4177 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4179 if (cheaper_cost_pair (cp, best_cp))
4180 best_cp = cp;
4183 iv_ca_set_cp (data, ivs, use, best_cp);
4186 /* Get cost for assignment IVS. */
4188 static unsigned
4189 iv_ca_cost (struct iv_ca *ivs)
4191 return (ivs->bad_uses ? INFTY : ivs->cost);
4194 /* Returns true if all dependences of CP are among invariants in IVS. */
4196 static bool
4197 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4199 unsigned i;
4200 bitmap_iterator bi;
4202 if (!cp->depends_on)
4203 return true;
4205 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4207 if (ivs->n_invariant_uses[i] == 0)
4208 return false;
4211 return true;
4214 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4215 it before NEXT_CHANGE. */
4217 static struct iv_ca_delta *
4218 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4219 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4221 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4223 change->use = use;
4224 change->old_cp = old_cp;
4225 change->new_cp = new_cp;
4226 change->next_change = next_change;
4228 return change;
4231 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4232 are rewritten. */
4234 static struct iv_ca_delta *
4235 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4237 struct iv_ca_delta *last;
4239 if (!l2)
4240 return l1;
4242 if (!l1)
4243 return l2;
4245 for (last = l1; last->next_change; last = last->next_change)
4246 continue;
4247 last->next_change = l2;
4249 return l1;
4252 /* Returns candidate by that USE is expressed in IVS. */
4254 static struct cost_pair *
4255 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4257 return ivs->cand_for_use[use->id];
4260 /* Reverse the list of changes DELTA, forming the inverse to it. */
4262 static struct iv_ca_delta *
4263 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4265 struct iv_ca_delta *act, *next, *prev = NULL;
4266 struct cost_pair *tmp;
4268 for (act = delta; act; act = next)
4270 next = act->next_change;
4271 act->next_change = prev;
4272 prev = act;
4274 tmp = act->old_cp;
4275 act->old_cp = act->new_cp;
4276 act->new_cp = tmp;
4279 return prev;
4282 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4283 reverted instead. */
4285 static void
4286 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4287 struct iv_ca_delta *delta, bool forward)
4289 struct cost_pair *from, *to;
4290 struct iv_ca_delta *act;
4292 if (!forward)
4293 delta = iv_ca_delta_reverse (delta);
4295 for (act = delta; act; act = act->next_change)
4297 from = act->old_cp;
4298 to = act->new_cp;
4299 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4300 iv_ca_set_cp (data, ivs, act->use, to);
4303 if (!forward)
4304 iv_ca_delta_reverse (delta);
4307 /* Returns true if CAND is used in IVS. */
4309 static bool
4310 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4312 return ivs->n_cand_uses[cand->id] > 0;
4315 /* Returns number of induction variable candidates in the set IVS. */
4317 static unsigned
4318 iv_ca_n_cands (struct iv_ca *ivs)
4320 return ivs->n_cands;
4323 /* Free the list of changes DELTA. */
4325 static void
4326 iv_ca_delta_free (struct iv_ca_delta **delta)
4328 struct iv_ca_delta *act, *next;
4330 for (act = *delta; act; act = next)
4332 next = act->next_change;
4333 free (act);
4336 *delta = NULL;
4339 /* Allocates new iv candidates assignment. */
4341 static struct iv_ca *
4342 iv_ca_new (struct ivopts_data *data)
4344 struct iv_ca *nw = XNEW (struct iv_ca);
4346 nw->upto = 0;
4347 nw->bad_uses = 0;
4348 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4349 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4350 nw->cands = BITMAP_ALLOC (NULL);
4351 nw->n_cands = 0;
4352 nw->n_regs = 0;
4353 nw->cand_use_cost = 0;
4354 nw->cand_cost = 0;
4355 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4356 nw->cost = 0;
4358 return nw;
4361 /* Free memory occupied by the set IVS. */
4363 static void
4364 iv_ca_free (struct iv_ca **ivs)
4366 free ((*ivs)->cand_for_use);
4367 free ((*ivs)->n_cand_uses);
4368 BITMAP_FREE ((*ivs)->cands);
4369 free ((*ivs)->n_invariant_uses);
4370 free (*ivs);
4371 *ivs = NULL;
4374 /* Dumps IVS to FILE. */
4376 static void
4377 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4379 const char *pref = " invariants ";
4380 unsigned i;
4382 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4383 bitmap_print (file, ivs->cands, " candidates ","\n");
4385 for (i = 1; i <= data->max_inv_id; i++)
4386 if (ivs->n_invariant_uses[i])
4388 fprintf (file, "%s%d", pref, i);
4389 pref = ", ";
4391 fprintf (file, "\n");
4394 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4395 new set, and store differences in DELTA. Number of induction variables
4396 in the new set is stored to N_IVS. */
4398 static unsigned
4399 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4400 struct iv_cand *cand, struct iv_ca_delta **delta,
4401 unsigned *n_ivs)
4403 unsigned i, cost;
4404 struct iv_use *use;
4405 struct cost_pair *old_cp, *new_cp;
4407 *delta = NULL;
4408 for (i = 0; i < ivs->upto; i++)
4410 use = iv_use (data, i);
4411 old_cp = iv_ca_cand_for_use (ivs, use);
4413 if (old_cp
4414 && old_cp->cand == cand)
4415 continue;
4417 new_cp = get_use_iv_cost (data, use, cand);
4418 if (!new_cp)
4419 continue;
4421 if (!iv_ca_has_deps (ivs, new_cp))
4422 continue;
4424 if (!cheaper_cost_pair (new_cp, old_cp))
4425 continue;
4427 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4430 iv_ca_delta_commit (data, ivs, *delta, true);
4431 cost = iv_ca_cost (ivs);
4432 if (n_ivs)
4433 *n_ivs = iv_ca_n_cands (ivs);
4434 iv_ca_delta_commit (data, ivs, *delta, false);
4436 return cost;
4439 /* Try narrowing set IVS by removing CAND. Return the cost of
4440 the new set and store the differences in DELTA. */
4442 static unsigned
4443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4444 struct iv_cand *cand, struct iv_ca_delta **delta)
4446 unsigned i, ci;
4447 struct iv_use *use;
4448 struct cost_pair *old_cp, *new_cp, *cp;
4449 bitmap_iterator bi;
4450 struct iv_cand *cnd;
4451 unsigned cost;
4453 *delta = NULL;
4454 for (i = 0; i < n_iv_uses (data); i++)
4456 use = iv_use (data, i);
4458 old_cp = iv_ca_cand_for_use (ivs, use);
4459 if (old_cp->cand != cand)
4460 continue;
4462 new_cp = NULL;
4464 if (data->consider_all_candidates)
4466 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4468 if (ci == cand->id)
4469 continue;
4471 cnd = iv_cand (data, ci);
4473 cp = get_use_iv_cost (data, use, cnd);
4474 if (!cp)
4475 continue;
4476 if (!iv_ca_has_deps (ivs, cp))
4477 continue;
4479 if (!cheaper_cost_pair (cp, new_cp))
4480 continue;
4482 new_cp = cp;
4485 else
4487 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4489 if (ci == cand->id)
4490 continue;
4492 cnd = iv_cand (data, ci);
4494 cp = get_use_iv_cost (data, use, cnd);
4495 if (!cp)
4496 continue;
4497 if (!iv_ca_has_deps (ivs, cp))
4498 continue;
4500 if (!cheaper_cost_pair (cp, new_cp))
4501 continue;
4503 new_cp = cp;
4507 if (!new_cp)
4509 iv_ca_delta_free (delta);
4510 return INFTY;
4513 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4516 iv_ca_delta_commit (data, ivs, *delta, true);
4517 cost = iv_ca_cost (ivs);
4518 iv_ca_delta_commit (data, ivs, *delta, false);
4520 return cost;
4523 /* Try optimizing the set of candidates IVS by removing candidates different
4524 from to EXCEPT_CAND from it. Return cost of the new set, and store
4525 differences in DELTA. */
4527 static unsigned
4528 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4529 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4531 bitmap_iterator bi;
4532 struct iv_ca_delta *act_delta, *best_delta;
4533 unsigned i, best_cost, acost;
4534 struct iv_cand *cand;
4536 best_delta = NULL;
4537 best_cost = iv_ca_cost (ivs);
4539 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4541 cand = iv_cand (data, i);
4543 if (cand == except_cand)
4544 continue;
4546 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4548 if (acost < best_cost)
4550 best_cost = acost;
4551 iv_ca_delta_free (&best_delta);
4552 best_delta = act_delta;
4554 else
4555 iv_ca_delta_free (&act_delta);
4558 if (!best_delta)
4560 *delta = NULL;
4561 return best_cost;
4564 /* Recurse to possibly remove other unnecessary ivs. */
4565 iv_ca_delta_commit (data, ivs, best_delta, true);
4566 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4567 iv_ca_delta_commit (data, ivs, best_delta, false);
4568 *delta = iv_ca_delta_join (best_delta, *delta);
4569 return best_cost;
4572 /* Tries to extend the sets IVS in the best possible way in order
4573 to express the USE. */
4575 static bool
4576 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4577 struct iv_use *use)
4579 unsigned best_cost, act_cost;
4580 unsigned i;
4581 bitmap_iterator bi;
4582 struct iv_cand *cand;
4583 struct iv_ca_delta *best_delta = NULL, *act_delta;
4584 struct cost_pair *cp;
4586 iv_ca_add_use (data, ivs, use);
4587 best_cost = iv_ca_cost (ivs);
4589 cp = iv_ca_cand_for_use (ivs, use);
4590 if (cp)
4592 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4593 iv_ca_set_no_cp (data, ivs, use);
4596 /* First try important candidates. Only if it fails, try the specific ones.
4597 Rationale -- in loops with many variables the best choice often is to use
4598 just one generic biv. If we added here many ivs specific to the uses,
4599 the optimization algorithm later would be likely to get stuck in a local
4600 minimum, thus causing us to create too many ivs. The approach from
4601 few ivs to more seems more likely to be successful -- starting from few
4602 ivs, replacing an expensive use by a specific iv should always be a
4603 win. */
4604 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4606 cand = iv_cand (data, i);
4608 if (iv_ca_cand_used_p (ivs, cand))
4609 continue;
4611 cp = get_use_iv_cost (data, use, cand);
4612 if (!cp)
4613 continue;
4615 iv_ca_set_cp (data, ivs, use, cp);
4616 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4617 iv_ca_set_no_cp (data, ivs, use);
4618 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4620 if (act_cost < best_cost)
4622 best_cost = act_cost;
4624 iv_ca_delta_free (&best_delta);
4625 best_delta = act_delta;
4627 else
4628 iv_ca_delta_free (&act_delta);
4631 if (best_cost == INFTY)
4633 for (i = 0; i < use->n_map_members; i++)
4635 cp = use->cost_map + i;
4636 cand = cp->cand;
4637 if (!cand)
4638 continue;
4640 /* Already tried this. */
4641 if (cand->important)
4642 continue;
4644 if (iv_ca_cand_used_p (ivs, cand))
4645 continue;
4647 act_delta = NULL;
4648 iv_ca_set_cp (data, ivs, use, cp);
4649 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4650 iv_ca_set_no_cp (data, ivs, use);
4651 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4652 cp, act_delta);
4654 if (act_cost < best_cost)
4656 best_cost = act_cost;
4658 if (best_delta)
4659 iv_ca_delta_free (&best_delta);
4660 best_delta = act_delta;
4662 else
4663 iv_ca_delta_free (&act_delta);
4667 iv_ca_delta_commit (data, ivs, best_delta, true);
4668 iv_ca_delta_free (&best_delta);
4670 return (best_cost != INFTY);
4673 /* Finds an initial assignment of candidates to uses. */
4675 static struct iv_ca *
4676 get_initial_solution (struct ivopts_data *data)
4678 struct iv_ca *ivs = iv_ca_new (data);
4679 unsigned i;
4681 for (i = 0; i < n_iv_uses (data); i++)
4682 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4684 iv_ca_free (&ivs);
4685 return NULL;
4688 return ivs;
4691 /* Tries to improve set of induction variables IVS. */
4693 static bool
4694 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4696 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4697 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4698 struct iv_cand *cand;
4700 /* Try extending the set of induction variables by one. */
4701 for (i = 0; i < n_iv_cands (data); i++)
4703 cand = iv_cand (data, i);
4705 if (iv_ca_cand_used_p (ivs, cand))
4706 continue;
4708 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4709 if (!act_delta)
4710 continue;
4712 /* If we successfully added the candidate and the set is small enough,
4713 try optimizing it by removing other candidates. */
4714 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4716 iv_ca_delta_commit (data, ivs, act_delta, true);
4717 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4718 iv_ca_delta_commit (data, ivs, act_delta, false);
4719 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4722 if (acost < best_cost)
4724 best_cost = acost;
4725 iv_ca_delta_free (&best_delta);
4726 best_delta = act_delta;
4728 else
4729 iv_ca_delta_free (&act_delta);
4732 if (!best_delta)
4734 /* Try removing the candidates from the set instead. */
4735 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4737 /* Nothing more we can do. */
4738 if (!best_delta)
4739 return false;
4742 iv_ca_delta_commit (data, ivs, best_delta, true);
4743 gcc_assert (best_cost == iv_ca_cost (ivs));
4744 iv_ca_delta_free (&best_delta);
4745 return true;
4748 /* Attempts to find the optimal set of induction variables. We do simple
4749 greedy heuristic -- we try to replace at most one candidate in the selected
4750 solution and remove the unused ivs while this improves the cost. */
4752 static struct iv_ca *
4753 find_optimal_iv_set (struct ivopts_data *data)
4755 unsigned i;
4756 struct iv_ca *set;
4757 struct iv_use *use;
4759 /* Get the initial solution. */
4760 set = get_initial_solution (data);
4761 if (!set)
4763 if (dump_file && (dump_flags & TDF_DETAILS))
4764 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4765 return NULL;
4768 if (dump_file && (dump_flags & TDF_DETAILS))
4770 fprintf (dump_file, "Initial set of candidates:\n");
4771 iv_ca_dump (data, dump_file, set);
4774 while (try_improve_iv_set (data, set))
4776 if (dump_file && (dump_flags & TDF_DETAILS))
4778 fprintf (dump_file, "Improved to:\n");
4779 iv_ca_dump (data, dump_file, set);
4783 if (dump_file && (dump_flags & TDF_DETAILS))
4784 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4786 for (i = 0; i < n_iv_uses (data); i++)
4788 use = iv_use (data, i);
4789 use->selected = iv_ca_cand_for_use (set, use)->cand;
4792 return set;
4795 /* Creates a new induction variable corresponding to CAND. */
4797 static void
4798 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4800 block_stmt_iterator incr_pos;
4801 tree base;
4802 bool after = false;
4804 if (!cand->iv)
4805 return;
4807 switch (cand->pos)
4809 case IP_NORMAL:
4810 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4811 break;
4813 case IP_END:
4814 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4815 after = true;
4816 break;
4818 case IP_ORIGINAL:
4819 /* Mark that the iv is preserved. */
4820 name_info (data, cand->var_before)->preserve_biv = true;
4821 name_info (data, cand->var_after)->preserve_biv = true;
4823 /* Rewrite the increment so that it uses var_before directly. */
4824 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4826 return;
4829 gimple_add_tmp_var (cand->var_before);
4830 add_referenced_var (cand->var_before);
4832 base = unshare_expr (cand->iv->base);
4834 create_iv (base, unshare_expr (cand->iv->step),
4835 cand->var_before, data->current_loop,
4836 &incr_pos, after, &cand->var_before, &cand->var_after);
4839 /* Creates new induction variables described in SET. */
4841 static void
4842 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4844 unsigned i;
4845 struct iv_cand *cand;
4846 bitmap_iterator bi;
4848 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4850 cand = iv_cand (data, i);
4851 create_new_iv (data, cand);
4855 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4856 is true, remove also the ssa name defined by the statement. */
4858 static void
4859 remove_statement (tree stmt, bool including_defined_name)
4861 if (TREE_CODE (stmt) == PHI_NODE)
4863 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4865 else
4867 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4869 bsi_remove (&bsi, true);
4870 release_defs (stmt);
4874 /* Rewrites USE (definition of iv used in a nonlinear expression)
4875 using candidate CAND. */
4877 static void
4878 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4879 struct iv_use *use, struct iv_cand *cand)
4881 tree comp;
4882 tree op, stmts, tgt, ass;
4883 block_stmt_iterator bsi, pbsi;
4885 /* An important special case -- if we are asked to express value of
4886 the original iv by itself, just exit; there is no need to
4887 introduce a new computation (that might also need casting the
4888 variable to unsigned and back). */
4889 if (cand->pos == IP_ORIGINAL
4890 && cand->incremented_at == use->stmt)
4892 tree step, ctype, utype;
4893 enum tree_code incr_code = PLUS_EXPR;
4895 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4896 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4898 step = cand->iv->step;
4899 ctype = TREE_TYPE (step);
4900 utype = TREE_TYPE (cand->var_after);
4901 if (TREE_CODE (step) == NEGATE_EXPR)
4903 incr_code = MINUS_EXPR;
4904 step = TREE_OPERAND (step, 0);
4907 /* Check whether we may leave the computation unchanged.
4908 This is the case only if it does not rely on other
4909 computations in the loop -- otherwise, the computation
4910 we rely upon may be removed in remove_unused_ivs,
4911 thus leading to ICE. */
4912 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4913 if (TREE_CODE (op) == PLUS_EXPR
4914 || TREE_CODE (op) == MINUS_EXPR)
4916 if (TREE_OPERAND (op, 0) == cand->var_before)
4917 op = TREE_OPERAND (op, 1);
4918 else if (TREE_CODE (op) == PLUS_EXPR
4919 && TREE_OPERAND (op, 1) == cand->var_before)
4920 op = TREE_OPERAND (op, 0);
4921 else
4922 op = NULL_TREE;
4924 else
4925 op = NULL_TREE;
4927 if (op
4928 && (TREE_CODE (op) == INTEGER_CST
4929 || operand_equal_p (op, step, 0)))
4930 return;
4932 /* Otherwise, add the necessary computations to express
4933 the iv. */
4934 op = fold_convert (ctype, cand->var_before);
4935 comp = fold_convert (utype,
4936 build2 (incr_code, ctype, op,
4937 unshare_expr (step)));
4939 else
4941 comp = get_computation (data->current_loop, use, cand);
4942 gcc_assert (comp != NULL_TREE);
4945 switch (TREE_CODE (use->stmt))
4947 case PHI_NODE:
4948 tgt = PHI_RESULT (use->stmt);
4950 /* If we should keep the biv, do not replace it. */
4951 if (name_info (data, tgt)->preserve_biv)
4952 return;
4954 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4955 while (!bsi_end_p (pbsi)
4956 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4958 bsi = pbsi;
4959 bsi_next (&pbsi);
4961 break;
4963 case GIMPLE_MODIFY_STMT:
4964 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4965 bsi = bsi_for_stmt (use->stmt);
4966 break;
4968 default:
4969 gcc_unreachable ();
4972 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4974 if (TREE_CODE (use->stmt) == PHI_NODE)
4976 if (stmts)
4977 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4978 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4979 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4980 remove_statement (use->stmt, false);
4981 SSA_NAME_DEF_STMT (tgt) = ass;
4983 else
4985 if (stmts)
4986 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4987 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4991 /* Replaces ssa name in index IDX by its basic variable. Callback for
4992 for_each_index. */
4994 static bool
4995 idx_remove_ssa_names (tree base, tree *idx,
4996 void *data ATTRIBUTE_UNUSED)
4998 tree *op;
5000 if (TREE_CODE (*idx) == SSA_NAME)
5001 *idx = SSA_NAME_VAR (*idx);
5003 if (TREE_CODE (base) == ARRAY_REF)
5005 op = &TREE_OPERAND (base, 2);
5006 if (*op
5007 && TREE_CODE (*op) == SSA_NAME)
5008 *op = SSA_NAME_VAR (*op);
5009 op = &TREE_OPERAND (base, 3);
5010 if (*op
5011 && TREE_CODE (*op) == SSA_NAME)
5012 *op = SSA_NAME_VAR (*op);
5015 return true;
5018 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5020 static tree
5021 unshare_and_remove_ssa_names (tree ref)
5023 ref = unshare_expr (ref);
5024 for_each_index (&ref, idx_remove_ssa_names, NULL);
5026 return ref;
5029 /* Extract the alias analysis info for the memory reference REF. There are
5030 several ways how this information may be stored and what precisely is
5031 its semantics depending on the type of the reference, but there always is
5032 somewhere hidden one _DECL node that is used to determine the set of
5033 virtual operands for the reference. The code below deciphers this jungle
5034 and extracts this single useful piece of information. */
5036 static tree
5037 get_ref_tag (tree ref, tree orig)
5039 tree var = get_base_address (ref);
5040 tree aref = NULL_TREE, tag, sv;
5041 HOST_WIDE_INT offset, size, maxsize;
5043 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5045 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5046 if (ref)
5047 break;
5050 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5051 return unshare_expr (sv);
5053 if (!var)
5054 return NULL_TREE;
5056 if (TREE_CODE (var) == INDIRECT_REF)
5058 /* If the base is a dereference of a pointer, first check its name memory
5059 tag. If it does not have one, use its symbol memory tag. */
5060 var = TREE_OPERAND (var, 0);
5061 if (TREE_CODE (var) != SSA_NAME)
5062 return NULL_TREE;
5064 if (SSA_NAME_PTR_INFO (var))
5066 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5067 if (tag)
5068 return tag;
5071 var = SSA_NAME_VAR (var);
5072 tag = symbol_mem_tag (var);
5073 gcc_assert (tag != NULL_TREE);
5074 return tag;
5076 else
5078 if (!DECL_P (var))
5079 return NULL_TREE;
5081 tag = symbol_mem_tag (var);
5082 if (tag)
5083 return tag;
5085 return var;
5089 /* Copies the reference information from OLD_REF to NEW_REF. */
5091 static void
5092 copy_ref_info (tree new_ref, tree old_ref)
5094 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5095 copy_mem_ref_info (new_ref, old_ref);
5096 else
5098 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5099 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5103 /* Rewrites USE (address that is an iv) using candidate CAND. */
5105 static void
5106 rewrite_use_address (struct ivopts_data *data,
5107 struct iv_use *use, struct iv_cand *cand)
5109 aff_tree aff;
5110 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5111 tree ref;
5112 bool ok;
5114 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5115 gcc_assert (ok);
5116 unshare_aff_combination (&aff);
5118 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5119 copy_ref_info (ref, *use->op_p);
5120 *use->op_p = ref;
5123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5124 candidate CAND. */
5126 static void
5127 rewrite_use_compare (struct ivopts_data *data,
5128 struct iv_use *use, struct iv_cand *cand)
5130 tree comp, *var_p, op, bound;
5131 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5132 enum tree_code compare;
5133 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5134 bool ok;
5136 bound = cp->value;
5137 if (bound)
5139 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5140 tree var_type = TREE_TYPE (var);
5142 compare = iv_elimination_compare (data, use);
5143 bound = unshare_expr (fold_convert (var_type, bound));
5144 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5146 *use->op_p = build2 (compare, boolean_type_node, var, op);
5147 return;
5150 /* The induction variable elimination failed; just express the original
5151 giv. */
5152 comp = get_computation (data->current_loop, use, cand);
5153 gcc_assert (comp != NULL_TREE);
5155 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5156 gcc_assert (ok);
5158 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5161 /* Rewrites USE using candidate CAND. */
5163 static void
5164 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5166 push_stmt_changes (&use->stmt);
5168 switch (use->type)
5170 case USE_NONLINEAR_EXPR:
5171 rewrite_use_nonlinear_expr (data, use, cand);
5172 break;
5174 case USE_ADDRESS:
5175 rewrite_use_address (data, use, cand);
5176 break;
5178 case USE_COMPARE:
5179 rewrite_use_compare (data, use, cand);
5180 break;
5182 default:
5183 gcc_unreachable ();
5186 pop_stmt_changes (&use->stmt);
5189 /* Rewrite the uses using the selected induction variables. */
5191 static void
5192 rewrite_uses (struct ivopts_data *data)
5194 unsigned i;
5195 struct iv_cand *cand;
5196 struct iv_use *use;
5198 for (i = 0; i < n_iv_uses (data); i++)
5200 use = iv_use (data, i);
5201 cand = use->selected;
5202 gcc_assert (cand);
5204 rewrite_use (data, use, cand);
5208 /* Removes the ivs that are not used after rewriting. */
5210 static void
5211 remove_unused_ivs (struct ivopts_data *data)
5213 unsigned j;
5214 bitmap_iterator bi;
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5218 struct version_info *info;
5220 info = ver_info (data, j);
5221 if (info->iv
5222 && !integer_zerop (info->iv->step)
5223 && !info->inv_id
5224 && !info->iv->have_use_for
5225 && !info->preserve_biv)
5226 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5230 /* Frees data allocated by the optimization of a single loop. */
5232 static void
5233 free_loop_data (struct ivopts_data *data)
5235 unsigned i, j;
5236 bitmap_iterator bi;
5237 tree obj;
5239 htab_empty (data->niters);
5241 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5243 struct version_info *info;
5245 info = ver_info (data, i);
5246 if (info->iv)
5247 free (info->iv);
5248 info->iv = NULL;
5249 info->has_nonlin_use = false;
5250 info->preserve_biv = false;
5251 info->inv_id = 0;
5253 bitmap_clear (data->relevant);
5254 bitmap_clear (data->important_candidates);
5256 for (i = 0; i < n_iv_uses (data); i++)
5258 struct iv_use *use = iv_use (data, i);
5260 free (use->iv);
5261 BITMAP_FREE (use->related_cands);
5262 for (j = 0; j < use->n_map_members; j++)
5263 if (use->cost_map[j].depends_on)
5264 BITMAP_FREE (use->cost_map[j].depends_on);
5265 free (use->cost_map);
5266 free (use);
5268 VEC_truncate (iv_use_p, data->iv_uses, 0);
5270 for (i = 0; i < n_iv_cands (data); i++)
5272 struct iv_cand *cand = iv_cand (data, i);
5274 if (cand->iv)
5275 free (cand->iv);
5276 if (cand->depends_on)
5277 BITMAP_FREE (cand->depends_on);
5278 free (cand);
5280 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5282 if (data->version_info_size < num_ssa_names)
5284 data->version_info_size = 2 * num_ssa_names;
5285 free (data->version_info);
5286 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5289 data->max_inv_id = 0;
5291 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5292 SET_DECL_RTL (obj, NULL_RTX);
5294 VEC_truncate (tree, decl_rtl_to_reset, 0);
5297 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5298 loop tree. */
5300 static void
5301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5303 free_loop_data (data);
5304 free (data->version_info);
5305 BITMAP_FREE (data->relevant);
5306 BITMAP_FREE (data->important_candidates);
5307 htab_delete (data->niters);
5309 VEC_free (tree, heap, decl_rtl_to_reset);
5310 VEC_free (iv_use_p, heap, data->iv_uses);
5311 VEC_free (iv_cand_p, heap, data->iv_candidates);
5314 /* Optimizes the LOOP. Returns true if anything changed. */
5316 static bool
5317 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5319 bool changed = false;
5320 struct iv_ca *iv_ca;
5321 edge exit;
5323 data->current_loop = loop;
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5327 fprintf (dump_file, "Processing loop %d\n", loop->num);
5329 exit = single_dom_exit (loop);
5330 if (exit)
5332 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5333 exit->src->index, exit->dest->index);
5334 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5335 fprintf (dump_file, "\n");
5338 fprintf (dump_file, "\n");
5341 /* For each ssa name determines whether it behaves as an induction variable
5342 in some loop. */
5343 if (!find_induction_variables (data))
5344 goto finish;
5346 /* Finds interesting uses (item 1). */
5347 find_interesting_uses (data);
5348 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5349 goto finish;
5351 /* Finds candidates for the induction variables (item 2). */
5352 find_iv_candidates (data);
5354 /* Calculates the costs (item 3, part 1). */
5355 determine_use_iv_costs (data);
5356 determine_iv_costs (data);
5357 determine_set_costs (data);
5359 /* Find the optimal set of induction variables (item 3, part 2). */
5360 iv_ca = find_optimal_iv_set (data);
5361 if (!iv_ca)
5362 goto finish;
5363 changed = true;
5365 /* Create the new induction variables (item 4, part 1). */
5366 create_new_ivs (data, iv_ca);
5367 iv_ca_free (&iv_ca);
5369 /* Rewrite the uses (item 4, part 2). */
5370 rewrite_uses (data);
5372 /* Remove the ivs that are unused after rewriting. */
5373 remove_unused_ivs (data);
5375 /* We have changed the structure of induction variables; it might happen
5376 that definitions in the scev database refer to some of them that were
5377 eliminated. */
5378 scev_reset ();
5380 finish:
5381 free_loop_data (data);
5383 return changed;
5386 /* Main entry point. Optimizes induction variables in loops. */
5388 void
5389 tree_ssa_iv_optimize (void)
5391 struct loop *loop;
5392 struct ivopts_data data;
5393 loop_iterator li;
5395 tree_ssa_iv_optimize_init (&data);
5397 /* Optimize the loops starting with the innermost ones. */
5398 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5400 if (dump_file && (dump_flags & TDF_DETAILS))
5401 flow_loop_dump (loop, dump_file, NULL, 1);
5403 tree_ssa_iv_optimize_loop (&data, loop);
5406 tree_ssa_iv_optimize_finalize (&data);