* tree-ssa-operands.h (struct ssa_operand_memory_d):
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob411cad22544cd2bac39d936c4c5f5d936995be21
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
94 /* The infinite cost. */
95 #define INFTY 10000000
97 /* The expected number of loop iterations. TODO -- use profiling instead of
98 this. */
99 #define AVG_LOOP_NITER(LOOP) 5
102 /* Representation of the induction variable. */
103 struct iv
105 tree base; /* Initial value of the iv. */
106 tree base_object; /* A memory object to that the induction variable points. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
115 struct version_info
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Types of uses. */
126 enum use_type
128 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
129 USE_ADDRESS, /* Use in an address. */
130 USE_COMPARE /* Use is a compare. */
133 /* The candidate - cost pair. */
134 struct cost_pair
136 struct iv_cand *cand; /* The candidate. */
137 unsigned cost; /* The cost. */
138 bitmap depends_on; /* The list of invariants that have to be
139 preserved. */
140 tree value; /* For final value elimination, the expression for
141 the final value of the iv. For iv elimination,
142 the new bound to compare with. */
145 /* Use. */
146 struct iv_use
148 unsigned id; /* The id of the use. */
149 enum use_type type; /* Type of the use. */
150 struct iv *iv; /* The induction variable it is based on. */
151 tree stmt; /* Statement in that it occurs. */
152 tree *op_p; /* The place where it occurs. */
153 bitmap related_cands; /* The set of "related" iv candidates, plus the common
154 important ones. */
156 unsigned n_map_members; /* Number of candidates in the cost_map list. */
157 struct cost_pair *cost_map;
158 /* The costs wrto the iv candidates. */
160 struct iv_cand *selected;
161 /* The selected candidate. */
164 /* The position where the iv is computed. */
165 enum iv_position
167 IP_NORMAL, /* At the end, just before the exit condition. */
168 IP_END, /* At the end of the latch block. */
169 IP_ORIGINAL /* The original biv. */
172 /* The induction variable candidate. */
173 struct iv_cand
175 unsigned id; /* The number of the candidate. */
176 bool important; /* Whether this is an "important" candidate, i.e. such
177 that it should be considered by all uses. */
178 enum iv_position pos; /* Where it is computed. */
179 tree incremented_at; /* For original biv, the statement where it is
180 incremented. */
181 tree var_before; /* The variable used for it before increment. */
182 tree var_after; /* The variable used for it after increment. */
183 struct iv *iv; /* The value of the candidate. NULL for
184 "pseudocandidate" used to indicate the possibility
185 to replace the final value of an iv by direct
186 computation of the value. */
187 unsigned cost; /* Cost of the candidate. */
188 bitmap depends_on; /* The list of invariants that are used in step of the
189 biv. */
192 /* The data used by the induction variable optimizations. */
194 typedef struct iv_use *iv_use_p;
195 DEF_VEC_P(iv_use_p);
196 DEF_VEC_ALLOC_P(iv_use_p,heap);
198 typedef struct iv_cand *iv_cand_p;
199 DEF_VEC_P(iv_cand_p);
200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
202 struct ivopts_data
204 /* The currently optimized loop. */
205 struct loop *current_loop;
207 /* Number of registers used in it. */
208 unsigned regs_used;
210 /* Numbers of iterations for all exits of the current loop. */
211 htab_t niters;
213 /* The size of version_info array allocated. */
214 unsigned version_info_size;
216 /* The array of information for the ssa names. */
217 struct version_info *version_info;
219 /* The bitmap of indices in version_info whose value was changed. */
220 bitmap relevant;
222 /* The maximum invariant id. */
223 unsigned max_inv_id;
225 /* The uses of induction variables. */
226 VEC(iv_use_p,heap) *iv_uses;
228 /* The candidates. */
229 VEC(iv_cand_p,heap) *iv_candidates;
231 /* A bitmap of important candidates. */
232 bitmap important_candidates;
234 /* Whether to consider just related and important candidates when replacing a
235 use. */
236 bool consider_all_candidates;
239 /* An assignment of iv candidates to uses. */
241 struct iv_ca
243 /* The number of uses covered by the assignment. */
244 unsigned upto;
246 /* Number of uses that cannot be expressed by the candidates in the set. */
247 unsigned bad_uses;
249 /* Candidate assigned to a use, together with the related costs. */
250 struct cost_pair **cand_for_use;
252 /* Number of times each candidate is used. */
253 unsigned *n_cand_uses;
255 /* The candidates used. */
256 bitmap cands;
258 /* The number of candidates in the set. */
259 unsigned n_cands;
261 /* Total number of registers needed. */
262 unsigned n_regs;
264 /* Total cost of expressing uses. */
265 unsigned cand_use_cost;
267 /* Total cost of candidates. */
268 unsigned cand_cost;
270 /* Number of times each invariant is used. */
271 unsigned *n_invariant_uses;
273 /* Total cost of the assignment. */
274 unsigned cost;
277 /* Difference of two iv candidate assignments. */
279 struct iv_ca_delta
281 /* Changed use. */
282 struct iv_use *use;
284 /* An old assignment (for rollback purposes). */
285 struct cost_pair *old_cp;
287 /* A new assignment. */
288 struct cost_pair *new_cp;
290 /* Next change in the list. */
291 struct iv_ca_delta *next_change;
294 /* Bound on number of candidates below that all candidates are considered. */
296 #define CONSIDER_ALL_CANDIDATES_BOUND \
297 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
300 optimizing such a loop would help, and it would take ages). */
302 #define MAX_CONSIDERED_USES \
303 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305 /* If there are at most this number of ivs in the set, try removing unnecessary
306 ivs from the set always. */
308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
309 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311 /* The list of trees for that the decl_rtl field must be reset is stored
312 here. */
314 static VEC(tree,heap) *decl_rtl_to_reset;
316 /* Number of uses recorded in DATA. */
318 static inline unsigned
319 n_iv_uses (struct ivopts_data *data)
321 return VEC_length (iv_use_p, data->iv_uses);
324 /* Ith use recorded in DATA. */
326 static inline struct iv_use *
327 iv_use (struct ivopts_data *data, unsigned i)
329 return VEC_index (iv_use_p, data->iv_uses, i);
332 /* Number of candidates recorded in DATA. */
334 static inline unsigned
335 n_iv_cands (struct ivopts_data *data)
337 return VEC_length (iv_cand_p, data->iv_candidates);
340 /* Ith candidate recorded in DATA. */
342 static inline struct iv_cand *
343 iv_cand (struct ivopts_data *data, unsigned i)
345 return VEC_index (iv_cand_p, data->iv_candidates, i);
348 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 edge
351 single_dom_exit (struct loop *loop)
353 edge exit = single_exit (loop);
355 if (!exit)
356 return NULL;
358 if (!just_once_each_iteration_p (loop, exit->src))
359 return NULL;
361 return exit;
364 /* Dumps information about the induction variable IV to FILE. */
366 extern void dump_iv (FILE *, struct iv *);
367 void
368 dump_iv (FILE *file, struct iv *iv)
370 if (iv->ssa_name)
372 fprintf (file, "ssa name ");
373 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
374 fprintf (file, "\n");
377 fprintf (file, " type ");
378 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
379 fprintf (file, "\n");
381 if (iv->step)
383 fprintf (file, " base ");
384 print_generic_expr (file, iv->base, TDF_SLIM);
385 fprintf (file, "\n");
387 fprintf (file, " step ");
388 print_generic_expr (file, iv->step, TDF_SLIM);
389 fprintf (file, "\n");
391 else
393 fprintf (file, " invariant ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
398 if (iv->base_object)
400 fprintf (file, " base object ");
401 print_generic_expr (file, iv->base_object, TDF_SLIM);
402 fprintf (file, "\n");
405 if (iv->biv_p)
406 fprintf (file, " is a biv\n");
409 /* Dumps information about the USE to FILE. */
411 extern void dump_use (FILE *, struct iv_use *);
412 void
413 dump_use (FILE *file, struct iv_use *use)
415 fprintf (file, "use %d\n", use->id);
417 switch (use->type)
419 case USE_NONLINEAR_EXPR:
420 fprintf (file, " generic\n");
421 break;
423 case USE_ADDRESS:
424 fprintf (file, " address\n");
425 break;
427 case USE_COMPARE:
428 fprintf (file, " compare\n");
429 break;
431 default:
432 gcc_unreachable ();
435 fprintf (file, " in statement ");
436 print_generic_expr (file, use->stmt, TDF_SLIM);
437 fprintf (file, "\n");
439 fprintf (file, " at position ");
440 if (use->op_p)
441 print_generic_expr (file, *use->op_p, TDF_SLIM);
442 fprintf (file, "\n");
444 dump_iv (file, use->iv);
446 if (use->related_cands)
448 fprintf (file, " related candidates ");
449 dump_bitmap (file, use->related_cands);
453 /* Dumps information about the uses to FILE. */
455 extern void dump_uses (FILE *, struct ivopts_data *);
456 void
457 dump_uses (FILE *file, struct ivopts_data *data)
459 unsigned i;
460 struct iv_use *use;
462 for (i = 0; i < n_iv_uses (data); i++)
464 use = iv_use (data, i);
466 dump_use (file, use);
467 fprintf (file, "\n");
471 /* Dumps information about induction variable candidate CAND to FILE. */
473 extern void dump_cand (FILE *, struct iv_cand *);
474 void
475 dump_cand (FILE *file, struct iv_cand *cand)
477 struct iv *iv = cand->iv;
479 fprintf (file, "candidate %d%s\n",
480 cand->id, cand->important ? " (important)" : "");
482 if (cand->depends_on)
484 fprintf (file, " depends on ");
485 dump_bitmap (file, cand->depends_on);
488 if (!iv)
490 fprintf (file, " final value replacement\n");
491 return;
494 switch (cand->pos)
496 case IP_NORMAL:
497 fprintf (file, " incremented before exit test\n");
498 break;
500 case IP_END:
501 fprintf (file, " incremented at end\n");
502 break;
504 case IP_ORIGINAL:
505 fprintf (file, " original biv\n");
506 break;
509 dump_iv (file, iv);
512 /* Returns the info for ssa version VER. */
514 static inline struct version_info *
515 ver_info (struct ivopts_data *data, unsigned ver)
517 return data->version_info + ver;
520 /* Returns the info for ssa name NAME. */
522 static inline struct version_info *
523 name_info (struct ivopts_data *data, tree name)
525 return ver_info (data, SSA_NAME_VERSION (name));
528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
529 emitted in LOOP. */
531 static bool
532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
534 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
536 gcc_assert (bb);
538 if (sbb == loop->latch)
539 return true;
541 if (sbb != bb)
542 return false;
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
550 static bool
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
558 return false;
560 if (stmt_bb != cand_bb)
561 return true;
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
568 return false;
569 if (bsi_stmt (bsi) == stmt)
570 return true;
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
577 static bool
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
580 switch (cand->pos)
582 case IP_END:
583 return false;
585 case IP_NORMAL:
586 return stmt_after_ip_normal_pos (loop, stmt);
588 case IP_ORIGINAL:
589 return stmt_after_ip_original_pos (cand, stmt);
591 default:
592 gcc_unreachable ();
596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
598 static bool
599 abnormal_ssa_name_p (tree exp)
601 if (!exp)
602 return false;
604 if (TREE_CODE (exp) != SSA_NAME)
605 return false;
607 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
611 abnormal phi node. Callback for for_each_index. */
613 static bool
614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
615 void *data ATTRIBUTE_UNUSED)
617 if (TREE_CODE (base) == ARRAY_REF)
619 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
620 return false;
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
622 return false;
625 return !abnormal_ssa_name_p (*index);
628 /* Returns true if EXPR contains a ssa name that occurs in an
629 abnormal phi node. */
631 bool
632 contains_abnormal_ssa_name_p (tree expr)
634 enum tree_code code;
635 enum tree_code_class class;
637 if (!expr)
638 return false;
640 code = TREE_CODE (expr);
641 class = TREE_CODE_CLASS (code);
643 if (code == SSA_NAME)
644 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
646 if (code == INTEGER_CST
647 || is_gimple_min_invariant (expr))
648 return false;
650 if (code == ADDR_EXPR)
651 return !for_each_index (&TREE_OPERAND (expr, 0),
652 idx_contains_abnormal_ssa_name_p,
653 NULL);
655 switch (class)
657 case tcc_binary:
658 case tcc_comparison:
659 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
660 return true;
662 /* Fallthru. */
663 case tcc_unary:
664 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
665 return true;
667 break;
669 default:
670 gcc_unreachable ();
673 return false;
676 /* Element of the table in that we cache the numbers of iterations obtained
677 from exits of the loop. */
679 struct nfe_cache_elt
681 /* The edge for that the number of iterations is cached. */
682 edge exit;
684 /* Number of iterations corresponding to this exit, or NULL if it cannot be
685 determined. */
686 tree niter;
689 /* Hash function for nfe_cache_elt E. */
691 static hashval_t
692 nfe_hash (const void *e)
694 const struct nfe_cache_elt *elt = e;
696 return htab_hash_pointer (elt->exit);
699 /* Equality function for nfe_cache_elt E1 and edge E2. */
701 static int
702 nfe_eq (const void *e1, const void *e2)
704 const struct nfe_cache_elt *elt1 = e1;
706 return elt1->exit == e2;
709 /* Returns tree describing number of iterations determined from
710 EXIT of DATA->current_loop, or NULL if something goes wrong. */
712 static tree
713 niter_for_exit (struct ivopts_data *data, edge exit)
715 struct nfe_cache_elt *nfe_desc;
716 struct tree_niter_desc desc;
717 PTR *slot;
719 slot = htab_find_slot_with_hash (data->niters, exit,
720 htab_hash_pointer (exit),
721 INSERT);
723 if (!*slot)
725 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
726 nfe_desc->exit = exit;
728 /* Try to determine number of iterations. We must know it
729 unconditionally (i.e., without possibility of # of iterations
730 being zero). Also, we cannot safely work with ssa names that
731 appear in phi nodes on abnormal edges, so that we do not create
732 overlapping life ranges for them (PR 27283). */
733 if (number_of_iterations_exit (data->current_loop,
734 exit, &desc, true)
735 && integer_zerop (desc.may_be_zero)
736 && !contains_abnormal_ssa_name_p (desc.niter))
737 nfe_desc->niter = desc.niter;
738 else
739 nfe_desc->niter = NULL_TREE;
741 else
742 nfe_desc = *slot;
744 return nfe_desc->niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
756 if (!exit)
757 return NULL;
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
782 static tree
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj, op0, op1;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (TREE_CODE (expr) == NOP_EXPR
792 || TREE_CODE (expr) == CONVERT_EXPR)
793 return determine_base_object (TREE_OPERAND (expr, 0));
795 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
796 return NULL_TREE;
798 switch (code)
800 case INTEGER_CST:
801 return NULL_TREE;
803 case ADDR_EXPR:
804 obj = TREE_OPERAND (expr, 0);
805 base = get_base_address (obj);
807 if (!base)
808 return expr;
810 if (TREE_CODE (base) == INDIRECT_REF)
811 return determine_base_object (TREE_OPERAND (base, 0));
813 return fold_convert (ptr_type_node,
814 build_fold_addr_expr (base));
816 case PLUS_EXPR:
817 case MINUS_EXPR:
818 op0 = determine_base_object (TREE_OPERAND (expr, 0));
819 op1 = determine_base_object (TREE_OPERAND (expr, 1));
821 if (!op1)
822 return op0;
824 if (!op0)
825 return (code == PLUS_EXPR
826 ? op1
827 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
829 return fold_build2 (code, ptr_type_node, op0, op1);
831 default:
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
837 for loop LOOP. */
839 static struct iv *
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
845 iv->base = base;
846 iv->base_object = determine_base_object (base);
847 iv->step = step;
848 iv->biv_p = false;
849 iv->have_use_for = false;
850 iv->use_id = 0;
851 iv->ssa_name = NULL_TREE;
853 return iv;
856 /* Sets STEP and BASE for induction variable IV. */
858 static void
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
861 struct version_info *info = name_info (data, iv);
863 gcc_assert (!info->iv);
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
870 /* Finds induction variable declaration for VAR. */
872 static struct iv *
873 get_iv (struct ivopts_data *data, tree var)
875 basic_block bb;
876 tree type = TREE_TYPE (var);
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
880 return NULL;
882 if (!name_info (data, var)->iv)
884 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
886 if (!bb
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
891 return name_info (data, var)->iv;
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
897 static tree
898 determine_biv_step (tree phi)
900 struct loop *loop = bb_for_stmt (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
902 affine_iv iv;
904 if (!is_gimple_reg (name))
905 return NULL_TREE;
907 if (!simple_iv (loop, phi, name, &iv, true))
908 return NULL_TREE;
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
913 /* Finds basic ivs. */
915 static bool
916 find_bivs (struct ivopts_data *data)
918 tree phi, step, type, base;
919 bool found = false;
920 struct loop *loop = data->current_loop;
922 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
924 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
925 continue;
927 step = determine_biv_step (phi);
928 if (!step)
929 continue;
931 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
932 base = expand_simple_operations (base);
933 if (contains_abnormal_ssa_name_p (base)
934 || contains_abnormal_ssa_name_p (step))
935 continue;
937 type = TREE_TYPE (PHI_RESULT (phi));
938 base = fold_convert (type, base);
939 if (step)
940 step = fold_convert (type, step);
942 set_iv (data, PHI_RESULT (phi), base, step);
943 found = true;
946 return found;
949 /* Marks basic ivs. */
951 static void
952 mark_bivs (struct ivopts_data *data)
954 tree phi, var;
955 struct iv *iv, *incr_iv;
956 struct loop *loop = data->current_loop;
957 basic_block incr_bb;
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
961 iv = get_iv (data, PHI_RESULT (phi));
962 if (!iv)
963 continue;
965 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
966 incr_iv = get_iv (data, var);
967 if (!incr_iv)
968 continue;
970 /* If the increment is in the subloop, ignore it. */
971 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
972 if (incr_bb->loop_father != data->current_loop
973 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
974 continue;
976 iv->biv_p = true;
977 incr_iv->biv_p = true;
981 /* Checks whether STMT defines a linear induction variable and stores its
982 parameters to IV. */
984 static bool
985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
987 tree lhs;
988 struct loop *loop = data->current_loop;
990 iv->base = NULL_TREE;
991 iv->step = NULL_TREE;
993 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
994 return false;
996 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
997 if (TREE_CODE (lhs) != SSA_NAME)
998 return false;
1000 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1001 return false;
1002 iv->base = expand_simple_operations (iv->base);
1004 if (contains_abnormal_ssa_name_p (iv->base)
1005 || contains_abnormal_ssa_name_p (iv->step))
1006 return false;
1008 return true;
1011 /* Finds general ivs in statement STMT. */
1013 static void
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1016 affine_iv iv;
1018 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1019 return;
1021 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1024 /* Finds general ivs in basic block BB. */
1026 static void
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1029 block_stmt_iterator bsi;
1031 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032 find_givs_in_stmt (data, bsi_stmt (bsi));
1035 /* Finds general ivs. */
1037 static void
1038 find_givs (struct ivopts_data *data)
1040 struct loop *loop = data->current_loop;
1041 basic_block *body = get_loop_body_in_dom_order (loop);
1042 unsigned i;
1044 for (i = 0; i < loop->num_nodes; i++)
1045 find_givs_in_bb (data, body[i]);
1046 free (body);
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050 variable and if so, its initial value and step. */
1052 static bool
1053 find_induction_variables (struct ivopts_data *data)
1055 unsigned i;
1056 bitmap_iterator bi;
1058 if (!find_bivs (data))
1059 return false;
1061 find_givs (data);
1062 mark_bivs (data);
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1066 tree niter = niter_for_single_dom_exit (data);
1068 if (niter)
1070 fprintf (dump_file, " number of iterations ");
1071 print_generic_expr (dump_file, niter, TDF_SLIM);
1072 fprintf (dump_file, "\n\n");
1075 fprintf (dump_file, "Induction variables:\n\n");
1077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1079 if (ver_info (data, i)->iv)
1080 dump_iv (dump_file, ver_info (data, i)->iv);
1084 return true;
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091 tree stmt, enum use_type use_type)
1093 struct iv_use *use = XCNEW (struct iv_use);
1095 use->id = n_iv_uses (data);
1096 use->type = use_type;
1097 use->iv = iv;
1098 use->stmt = stmt;
1099 use->op_p = use_p;
1100 use->related_cands = BITMAP_ALLOC (NULL);
1102 /* To avoid showing ssa name in the dumps, if it was not reset by the
1103 caller. */
1104 iv->ssa_name = NULL_TREE;
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1107 dump_use (dump_file, use);
1109 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1111 return use;
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115 NONLINEAR_USE is true if the invariant is used in a way we do not
1116 handle specially. */
1118 static void
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1121 basic_block bb;
1122 struct version_info *info;
1124 if (TREE_CODE (op) != SSA_NAME
1125 || !is_gimple_reg (op))
1126 return;
1128 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1129 if (bb
1130 && flow_bb_inside_loop_p (data->current_loop, bb))
1131 return;
1133 info = name_info (data, op);
1134 info->name = op;
1135 info->has_nonlin_use |= nonlinear_use;
1136 if (!info->inv_id)
1137 info->inv_id = ++data->max_inv_id;
1138 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1141 /* Checks whether the use OP is interesting and if so, records it. */
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1146 struct iv *iv;
1147 struct iv *civ;
1148 tree stmt;
1149 struct iv_use *use;
1151 if (TREE_CODE (op) != SSA_NAME)
1152 return NULL;
1154 iv = get_iv (data, op);
1155 if (!iv)
1156 return NULL;
1158 if (iv->have_use_for)
1160 use = iv_use (data, iv->use_id);
1162 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163 return use;
1166 if (integer_zerop (iv->step))
1168 record_invariant (data, op, true);
1169 return NULL;
1171 iv->have_use_for = true;
1173 civ = XNEW (struct iv);
1174 *civ = *iv;
1176 stmt = SSA_NAME_DEF_STMT (op);
1177 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1180 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181 iv->use_id = use->id;
1183 return use;
1186 /* Checks whether the condition *COND_P in STMT is interesting
1187 and if so, records it. */
1189 static void
1190 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1192 tree *op0_p;
1193 tree *op1_p;
1194 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1195 struct iv const_iv;
1196 tree zero = integer_zero_node;
1198 const_iv.step = integer_zero_node;
1200 if (TREE_CODE (*cond_p) != SSA_NAME
1201 && !COMPARISON_CLASS_P (*cond_p))
1202 return;
1204 if (TREE_CODE (*cond_p) == SSA_NAME)
1206 op0_p = cond_p;
1207 op1_p = &zero;
1209 else
1211 op0_p = &TREE_OPERAND (*cond_p, 0);
1212 op1_p = &TREE_OPERAND (*cond_p, 1);
1215 if (TREE_CODE (*op0_p) == SSA_NAME)
1216 iv0 = get_iv (data, *op0_p);
1217 else
1218 iv0 = &const_iv;
1220 if (TREE_CODE (*op1_p) == SSA_NAME)
1221 iv1 = get_iv (data, *op1_p);
1222 else
1223 iv1 = &const_iv;
1225 if (/* When comparing with non-invariant value, we may not do any senseful
1226 induction variable elimination. */
1227 (!iv0 || !iv1)
1228 /* Eliminating condition based on two ivs would be nontrivial.
1229 ??? TODO -- it is not really important to handle this case. */
1230 || (!integer_zerop (iv0->step)
1231 && !integer_zerop (iv1->step)))
1233 find_interesting_uses_op (data, *op0_p);
1234 find_interesting_uses_op (data, *op1_p);
1235 return;
1238 if (integer_zerop (iv0->step)
1239 && integer_zerop (iv1->step))
1241 /* If both are invariants, this is a work for unswitching. */
1242 return;
1245 civ = XNEW (struct iv);
1246 *civ = integer_zerop (iv0->step) ? *iv1: *iv0;
1247 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251 i.e. if all its operands are defined outside of the LOOP. */
1253 bool
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1256 basic_block def_bb;
1257 unsigned i, len;
1259 if (is_gimple_min_invariant (expr))
1260 return true;
1262 if (TREE_CODE (expr) == SSA_NAME)
1264 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1265 if (def_bb
1266 && flow_bb_inside_loop_p (loop, def_bb))
1267 return false;
1269 return true;
1272 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1273 return false;
1275 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1276 for (i = 0; i < len; i++)
1277 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278 return false;
1280 return true;
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284 initial ones. Returns false when the value of the index cannot be determined.
1285 Callback for for_each_index. */
1287 struct ifs_ivopts_data
1289 struct ivopts_data *ivopts_data;
1290 tree stmt;
1291 tree step;
1294 static bool
1295 idx_find_step (tree base, tree *idx, void *data)
1297 struct ifs_ivopts_data *dta = data;
1298 struct iv *iv;
1299 tree step, iv_base, iv_step, lbound, off;
1300 struct loop *loop = dta->ivopts_data->current_loop;
1302 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1304 return false;
1306 /* If base is a component ref, require that the offset of the reference
1307 be invariant. */
1308 if (TREE_CODE (base) == COMPONENT_REF)
1310 off = component_ref_field_offset (base);
1311 return expr_invariant_in_loop_p (loop, off);
1314 /* If base is array, first check whether we will be able to move the
1315 reference out of the loop (in order to take its address in strength
1316 reduction). In order for this to work we need both lower bound
1317 and step to be loop invariants. */
1318 if (TREE_CODE (base) == ARRAY_REF)
1320 step = array_ref_element_size (base);
1321 lbound = array_ref_low_bound (base);
1323 if (!expr_invariant_in_loop_p (loop, step)
1324 || !expr_invariant_in_loop_p (loop, lbound))
1325 return false;
1328 if (TREE_CODE (*idx) != SSA_NAME)
1329 return true;
1331 iv = get_iv (dta->ivopts_data, *idx);
1332 if (!iv)
1333 return false;
1335 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1336 *&x[0], which is not folded and does not trigger the
1337 ARRAY_REF path below. */
1338 *idx = iv->base;
1340 if (integer_zerop (iv->step))
1341 return true;
1343 if (TREE_CODE (base) == ARRAY_REF)
1345 step = array_ref_element_size (base);
1347 /* We only handle addresses whose step is an integer constant. */
1348 if (TREE_CODE (step) != INTEGER_CST)
1349 return false;
1351 else
1352 /* The step for pointer arithmetics already is 1 byte. */
1353 step = build_int_cst (sizetype, 1);
1355 iv_base = iv->base;
1356 iv_step = iv->step;
1357 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358 sizetype, &iv_base, &iv_step, dta->stmt,
1359 false))
1361 /* The index might wrap. */
1362 return false;
1365 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1368 return true;
1371 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1372 object is passed to it in DATA. */
1374 static bool
1375 idx_record_use (tree base, tree *idx,
1376 void *data)
1378 find_interesting_uses_op (data, *idx);
1379 if (TREE_CODE (base) == ARRAY_REF)
1381 find_interesting_uses_op (data, array_ref_element_size (base));
1382 find_interesting_uses_op (data, array_ref_low_bound (base));
1384 return true;
1387 /* Returns true if memory reference REF may be unaligned. */
1389 static bool
1390 may_be_unaligned_p (tree ref)
1392 tree base;
1393 tree base_type;
1394 HOST_WIDE_INT bitsize;
1395 HOST_WIDE_INT bitpos;
1396 tree toffset;
1397 enum machine_mode mode;
1398 int unsignedp, volatilep;
1399 unsigned base_align;
1401 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402 thus they are not misaligned. */
1403 if (TREE_CODE (ref) == TARGET_MEM_REF)
1404 return false;
1406 /* The test below is basically copy of what expr.c:normal_inner_ref
1407 does to check whether the object must be loaded by parts when
1408 STRICT_ALIGNMENT is true. */
1409 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410 &unsignedp, &volatilep, true);
1411 base_type = TREE_TYPE (base);
1412 base_align = TYPE_ALIGN (base_type);
1414 if (mode != BLKmode
1415 && (base_align < GET_MODE_ALIGNMENT (mode)
1416 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417 || bitpos % BITS_PER_UNIT != 0))
1418 return true;
1420 return false;
1423 /* Return true if EXPR may be non-addressable. */
1425 static bool
1426 may_be_nonaddressable_p (tree expr)
1428 switch (TREE_CODE (expr))
1430 case COMPONENT_REF:
1431 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1434 case ARRAY_REF:
1435 case ARRAY_RANGE_REF:
1436 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1438 case VIEW_CONVERT_EXPR:
1439 /* This kind of view-conversions may wrap non-addressable objects
1440 and make them look addressable. After some processing the
1441 non-addressability may be uncovered again, causing ADDR_EXPRs
1442 of inappropriate objects to be built. */
1443 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1446 default:
1447 break;
1450 return false;
1453 /* Finds addresses in *OP_P inside STMT. */
1455 static void
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1458 tree base = *op_p, step = build_int_cst (sizetype, 0);
1459 struct iv *civ;
1460 struct ifs_ivopts_data ifs_ivopts_data;
1462 /* Do not play with volatile memory references. A bit too conservative,
1463 perhaps, but safe. */
1464 if (stmt_ann (stmt)->has_volatile_ops)
1465 goto fail;
1467 /* Ignore bitfields for now. Not really something terribly complicated
1468 to handle. TODO. */
1469 if (TREE_CODE (base) == BIT_FIELD_REF)
1470 goto fail;
1472 if (may_be_nonaddressable_p (base))
1473 goto fail;
1475 if (STRICT_ALIGNMENT
1476 && may_be_unaligned_p (base))
1477 goto fail;
1479 base = unshare_expr (base);
1481 if (TREE_CODE (base) == TARGET_MEM_REF)
1483 tree type = build_pointer_type (TREE_TYPE (base));
1484 tree astep;
1486 if (TMR_BASE (base)
1487 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1489 civ = get_iv (data, TMR_BASE (base));
1490 if (!civ)
1491 goto fail;
1493 TMR_BASE (base) = civ->base;
1494 step = civ->step;
1496 if (TMR_INDEX (base)
1497 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1499 civ = get_iv (data, TMR_INDEX (base));
1500 if (!civ)
1501 goto fail;
1503 TMR_INDEX (base) = civ->base;
1504 astep = civ->step;
1506 if (astep)
1508 if (TMR_STEP (base))
1509 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1511 step = fold_build2 (PLUS_EXPR, type, step, astep);
1515 if (integer_zerop (step))
1516 goto fail;
1517 base = tree_mem_ref_addr (type, base);
1519 else
1521 ifs_ivopts_data.ivopts_data = data;
1522 ifs_ivopts_data.stmt = stmt;
1523 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525 || integer_zerop (ifs_ivopts_data.step))
1526 goto fail;
1527 step = ifs_ivopts_data.step;
1529 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1532 base = build_fold_addr_expr (base);
1534 /* Substituting bases of IVs into the base expression might
1535 have caused folding opportunities. */
1536 if (TREE_CODE (base) == ADDR_EXPR)
1538 tree *ref = &TREE_OPERAND (base, 0);
1539 while (handled_component_p (*ref))
1540 ref = &TREE_OPERAND (*ref, 0);
1541 if (TREE_CODE (*ref) == INDIRECT_REF)
1542 *ref = fold_indirect_ref (*ref);
1546 civ = alloc_iv (base, step);
1547 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1548 return;
1550 fail:
1551 for_each_index (op_p, idx_record_use, data);
1554 /* Finds and records invariants used in STMT. */
1556 static void
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1559 ssa_op_iter iter;
1560 use_operand_p use_p;
1561 tree op;
1563 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1565 op = USE_FROM_PTR (use_p);
1566 record_invariant (data, op, false);
1570 /* Finds interesting uses of induction variables in the statement STMT. */
1572 static void
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575 struct iv *iv;
1576 tree op, lhs, rhs;
1577 ssa_op_iter iter;
1578 use_operand_p use_p;
1580 find_invariants_stmt (data, stmt);
1582 if (TREE_CODE (stmt) == COND_EXPR)
1584 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1585 return;
1588 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1590 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1593 if (TREE_CODE (lhs) == SSA_NAME)
1595 /* If the statement defines an induction variable, the uses are not
1596 interesting by themselves. */
1598 iv = get_iv (data, lhs);
1600 if (iv && !integer_zerop (iv->step))
1601 return;
1604 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606 case tcc_comparison:
1607 find_interesting_uses_cond (data, stmt,
1608 &GIMPLE_STMT_OPERAND (stmt, 1));
1609 return;
1611 case tcc_reference:
1612 find_interesting_uses_address (data, stmt,
1613 &GIMPLE_STMT_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt,
1616 &GIMPLE_STMT_OPERAND (stmt, 0));
1617 return;
1619 default: ;
1622 if (REFERENCE_CLASS_P (lhs)
1623 && is_gimple_val (rhs))
1625 find_interesting_uses_address (data, stmt,
1626 &GIMPLE_STMT_OPERAND (stmt, 0));
1627 find_interesting_uses_op (data, rhs);
1628 return;
1631 /* TODO -- we should also handle address uses of type
1633 memory = call (whatever);
1637 call (memory). */
1640 if (TREE_CODE (stmt) == PHI_NODE
1641 && bb_for_stmt (stmt) == data->current_loop->header)
1643 lhs = PHI_RESULT (stmt);
1644 iv = get_iv (data, lhs);
1646 if (iv && !integer_zerop (iv->step))
1647 return;
1650 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1652 op = USE_FROM_PTR (use_p);
1654 if (TREE_CODE (op) != SSA_NAME)
1655 continue;
1657 iv = get_iv (data, op);
1658 if (!iv)
1659 continue;
1661 find_interesting_uses_op (data, op);
1665 /* Finds interesting uses of induction variables outside of loops
1666 on loop exit edge EXIT. */
1668 static void
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1671 tree phi, def;
1673 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1675 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676 if (is_gimple_reg (def))
1677 find_interesting_uses_op (data, def);
1681 /* Finds uses of the induction variables that are interesting. */
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1686 basic_block bb;
1687 block_stmt_iterator bsi;
1688 tree phi;
1689 basic_block *body = get_loop_body (data->current_loop);
1690 unsigned i;
1691 struct version_info *info;
1692 edge e;
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "Uses:\n\n");
1697 for (i = 0; i < data->current_loop->num_nodes; i++)
1699 edge_iterator ei;
1700 bb = body[i];
1702 FOR_EACH_EDGE (e, ei, bb->succs)
1703 if (e->dest != EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705 find_interesting_uses_outside (data, e);
1707 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708 find_interesting_uses_stmt (data, phi);
1709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1715 bitmap_iterator bi;
1717 fprintf (dump_file, "\n");
1719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1721 info = ver_info (data, i);
1722 if (info->inv_id)
1724 fprintf (dump_file, " ");
1725 print_generic_expr (dump_file, info->name, TDF_SLIM);
1726 fprintf (dump_file, " is invariant (%d)%s\n",
1727 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1731 fprintf (dump_file, "\n");
1734 free (body);
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743 unsigned HOST_WIDE_INT *offset)
1745 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746 enum tree_code code;
1747 tree type, orig_type = TREE_TYPE (expr);
1748 unsigned HOST_WIDE_INT off0, off1, st;
1749 tree orig_expr = expr;
1751 STRIP_NOPS (expr);
1753 type = TREE_TYPE (expr);
1754 code = TREE_CODE (expr);
1755 *offset = 0;
1757 switch (code)
1759 case INTEGER_CST:
1760 if (!cst_and_fits_in_hwi (expr)
1761 || integer_zerop (expr))
1762 return orig_expr;
1764 *offset = int_cst_value (expr);
1765 return build_int_cst (orig_type, 0);
1767 case PLUS_EXPR:
1768 case MINUS_EXPR:
1769 op0 = TREE_OPERAND (expr, 0);
1770 op1 = TREE_OPERAND (expr, 1);
1772 op0 = strip_offset_1 (op0, false, false, &off0);
1773 op1 = strip_offset_1 (op1, false, false, &off1);
1775 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776 if (op0 == TREE_OPERAND (expr, 0)
1777 && op1 == TREE_OPERAND (expr, 1))
1778 return orig_expr;
1780 if (integer_zerop (op1))
1781 expr = op0;
1782 else if (integer_zerop (op0))
1784 if (code == PLUS_EXPR)
1785 expr = op1;
1786 else
1787 expr = fold_build1 (NEGATE_EXPR, type, op1);
1789 else
1790 expr = fold_build2 (code, type, op0, op1);
1792 return fold_convert (orig_type, expr);
1794 case ARRAY_REF:
1795 if (!inside_addr)
1796 return orig_expr;
1798 step = array_ref_element_size (expr);
1799 if (!cst_and_fits_in_hwi (step))
1800 break;
1802 st = int_cst_value (step);
1803 op1 = TREE_OPERAND (expr, 1);
1804 op1 = strip_offset_1 (op1, false, false, &off1);
1805 *offset = off1 * st;
1807 if (top_compref
1808 && integer_zerop (op1))
1810 /* Strip the component reference completely. */
1811 op0 = TREE_OPERAND (expr, 0);
1812 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813 *offset += off0;
1814 return op0;
1816 break;
1818 case COMPONENT_REF:
1819 if (!inside_addr)
1820 return orig_expr;
1822 tmp = component_ref_field_offset (expr);
1823 if (top_compref
1824 && cst_and_fits_in_hwi (tmp))
1826 /* Strip the component reference completely. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829 *offset = off0 + int_cst_value (tmp);
1830 return op0;
1832 break;
1834 case ADDR_EXPR:
1835 op0 = TREE_OPERAND (expr, 0);
1836 op0 = strip_offset_1 (op0, true, true, &off0);
1837 *offset += off0;
1839 if (op0 == TREE_OPERAND (expr, 0))
1840 return orig_expr;
1842 expr = build_fold_addr_expr (op0);
1843 return fold_convert (orig_type, expr);
1845 case INDIRECT_REF:
1846 inside_addr = false;
1847 break;
1849 default:
1850 return orig_expr;
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857 *offset += off0;
1859 if (op0 == TREE_OPERAND (expr, 0)
1860 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861 return orig_expr;
1863 expr = copy_node (expr);
1864 TREE_OPERAND (expr, 0) = op0;
1865 if (op1)
1866 TREE_OPERAND (expr, 1) = op1;
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1870 will fix that. */
1871 expr = fold_convert (orig_type, expr);
1873 return expr;
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1881 return strip_offset_1 (expr, false, false, offset);
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 We return unsigned type with the same precision, which avoids problems
1886 with overflows. */
1888 static tree
1889 generic_type_for (tree type)
1891 if (POINTER_TYPE_P (type))
1892 return unsigned_type_for (type);
1894 if (TYPE_UNSIGNED (type))
1895 return type;
1897 return unsigned_type_for (type);
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1907 bitmap *depends_on = data;
1908 struct version_info *info;
1910 if (TREE_CODE (*expr_p) != SSA_NAME)
1911 return NULL_TREE;
1912 info = name_info (fd_ivopts_data, *expr_p);
1914 if (!info->inv_id || info->has_nonlin_use)
1915 return NULL_TREE;
1917 if (!*depends_on)
1918 *depends_on = BITMAP_ALLOC (NULL);
1919 bitmap_set_bit (*depends_on, info->inv_id);
1921 return NULL_TREE;
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931 tree base, tree step, bool important, enum iv_position pos,
1932 struct iv_use *use, tree incremented_at)
1934 unsigned i;
1935 struct iv_cand *cand = NULL;
1936 tree type, orig_type;
1938 if (base)
1940 orig_type = TREE_TYPE (base);
1941 type = generic_type_for (orig_type);
1942 if (type != orig_type)
1944 base = fold_convert (type, base);
1945 step = fold_convert (type, step);
1949 for (i = 0; i < n_iv_cands (data); i++)
1951 cand = iv_cand (data, i);
1953 if (cand->pos != pos)
1954 continue;
1956 if (cand->incremented_at != incremented_at)
1957 continue;
1959 if (!cand->iv)
1961 if (!base && !step)
1962 break;
1964 continue;
1967 if (!base && !step)
1968 continue;
1970 if (operand_equal_p (base, cand->iv->base, 0)
1971 && operand_equal_p (step, cand->iv->step, 0))
1972 break;
1975 if (i == n_iv_cands (data))
1977 cand = XCNEW (struct iv_cand);
1978 cand->id = i;
1980 if (!base && !step)
1981 cand->iv = NULL;
1982 else
1983 cand->iv = alloc_iv (base, step);
1985 cand->pos = pos;
1986 if (pos != IP_ORIGINAL && cand->iv)
1988 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989 cand->var_after = cand->var_before;
1991 cand->important = important;
1992 cand->incremented_at = incremented_at;
1993 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1995 if (step
1996 && TREE_CODE (step) != INTEGER_CST)
1998 fd_ivopts_data = data;
1999 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2002 if (dump_file && (dump_flags & TDF_DETAILS))
2003 dump_cand (dump_file, cand);
2006 if (important && !cand->important)
2008 cand->important = true;
2009 if (dump_file && (dump_flags & TDF_DETAILS))
2010 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2013 if (use)
2015 bitmap_set_bit (use->related_cands, i);
2016 if (dump_file && (dump_flags & TDF_DETAILS))
2017 fprintf (dump_file, "Candidate %d is related to use %d\n",
2018 cand->id, use->id);
2021 return cand;
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2025 is allowed.
2027 The purpose is to avoid splitting latch edge with a biv increment, thus
2028 creating a jump, possibly confusing other optimization passes and leaving
2029 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2030 is not available (so we do not have a better alternative), or if the latch
2031 edge is already nonempty. */
2033 static bool
2034 allow_ip_end_pos_p (struct loop *loop)
2036 if (!ip_normal_pos (loop))
2037 return true;
2039 if (!empty_block_p (ip_end_pos (loop)))
2040 return true;
2042 return false;
2045 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2046 position to POS. If USE is not NULL, the candidate is set as related to
2047 it. The candidate computation is scheduled on all available positions. */
2049 static void
2050 add_candidate (struct ivopts_data *data,
2051 tree base, tree step, bool important, struct iv_use *use)
2053 if (ip_normal_pos (data->current_loop))
2054 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055 if (ip_end_pos (data->current_loop)
2056 && allow_ip_end_pos_p (data->current_loop))
2057 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061 type with SIZE bits. */
2063 static void
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2065 unsigned int size)
2067 tree type = lang_hooks.types.type_for_size (size, true);
2068 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2069 true, NULL);
2072 /* Adds standard iv candidates. */
2074 static void
2075 add_standard_iv_candidates (struct ivopts_data *data)
2077 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2079 /* The same for a double-integer type if it is still fast enough. */
2080 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2085 /* Adds candidates bases on the old induction variable IV. */
2087 static void
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2090 tree phi, def;
2091 struct iv_cand *cand;
2093 add_candidate (data, iv->base, iv->step, true, NULL);
2095 /* The same, but with initial value zero. */
2096 add_candidate (data,
2097 build_int_cst (TREE_TYPE (iv->base), 0),
2098 iv->step, true, NULL);
2100 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101 if (TREE_CODE (phi) == PHI_NODE)
2103 /* Additionally record the possibility of leaving the original iv
2104 untouched. */
2105 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106 cand = add_candidate_1 (data,
2107 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108 SSA_NAME_DEF_STMT (def));
2109 cand->var_before = iv->ssa_name;
2110 cand->var_after = def;
2114 /* Adds candidates based on the old induction variables. */
2116 static void
2117 add_old_ivs_candidates (struct ivopts_data *data)
2119 unsigned i;
2120 struct iv *iv;
2121 bitmap_iterator bi;
2123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2125 iv = ver_info (data, i)->iv;
2126 if (iv && iv->biv_p && !integer_zerop (iv->step))
2127 add_old_iv_candidates (data, iv);
2131 /* Adds candidates based on the value of the induction variable IV and USE. */
2133 static void
2134 add_iv_value_candidates (struct ivopts_data *data,
2135 struct iv *iv, struct iv_use *use)
2137 unsigned HOST_WIDE_INT offset;
2138 tree base;
2140 add_candidate (data, iv->base, iv->step, false, use);
2142 /* The same, but with initial value zero. Make such variable important,
2143 since it is generic enough so that possibly many uses may be based
2144 on it. */
2145 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146 iv->step, true, use);
2148 /* Third, try removing the constant offset. */
2149 base = strip_offset (iv->base, &offset);
2150 if (offset)
2151 add_candidate (data, base, iv->step, false, use);
2154 /* Adds candidates based on the uses. */
2156 static void
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2159 unsigned i;
2161 for (i = 0; i < n_iv_uses (data); i++)
2163 struct iv_use *use = iv_use (data, i);
2165 if (!use)
2166 continue;
2168 switch (use->type)
2170 case USE_NONLINEAR_EXPR:
2171 case USE_COMPARE:
2172 case USE_ADDRESS:
2173 /* Just add the ivs based on the value of the iv used here. */
2174 add_iv_value_candidates (data, use->iv, use);
2175 break;
2177 default:
2178 gcc_unreachable ();
2183 /* Record important candidates and add them to related_cands bitmaps
2184 if needed. */
2186 static void
2187 record_important_candidates (struct ivopts_data *data)
2189 unsigned i;
2190 struct iv_use *use;
2192 for (i = 0; i < n_iv_cands (data); i++)
2194 struct iv_cand *cand = iv_cand (data, i);
2196 if (cand->important)
2197 bitmap_set_bit (data->important_candidates, i);
2200 data->consider_all_candidates = (n_iv_cands (data)
2201 <= CONSIDER_ALL_CANDIDATES_BOUND);
2203 if (data->consider_all_candidates)
2205 /* We will not need "related_cands" bitmaps in this case,
2206 so release them to decrease peak memory consumption. */
2207 for (i = 0; i < n_iv_uses (data); i++)
2209 use = iv_use (data, i);
2210 BITMAP_FREE (use->related_cands);
2213 else
2215 /* Add important candidates to the related_cands bitmaps. */
2216 for (i = 0; i < n_iv_uses (data); i++)
2217 bitmap_ior_into (iv_use (data, i)->related_cands,
2218 data->important_candidates);
2222 /* Finds the candidates for the induction variables. */
2224 static void
2225 find_iv_candidates (struct ivopts_data *data)
2227 /* Add commonly used ivs. */
2228 add_standard_iv_candidates (data);
2230 /* Add old induction variables. */
2231 add_old_ivs_candidates (data);
2233 /* Add induction variables derived from uses. */
2234 add_derived_ivs_candidates (data);
2236 /* Record the important candidates. */
2237 record_important_candidates (data);
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242 we allocate a simple list to every use. */
2244 static void
2245 alloc_use_cost_map (struct ivopts_data *data)
2247 unsigned i, size, s, j;
2249 for (i = 0; i < n_iv_uses (data); i++)
2251 struct iv_use *use = iv_use (data, i);
2252 bitmap_iterator bi;
2254 if (data->consider_all_candidates)
2255 size = n_iv_cands (data);
2256 else
2258 s = 0;
2259 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2261 s++;
2264 /* Round up to the power of two, so that moduling by it is fast. */
2265 for (size = 1; size < s; size <<= 1)
2266 continue;
2269 use->n_map_members = size;
2270 use->cost_map = XCNEWVEC (struct cost_pair, size);
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275 on invariants DEPENDS_ON and that the value used in expressing it
2276 is VALUE.*/
2278 static void
2279 set_use_iv_cost (struct ivopts_data *data,
2280 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281 bitmap depends_on, tree value)
2283 unsigned i, s;
2285 if (cost == INFTY)
2287 BITMAP_FREE (depends_on);
2288 return;
2291 if (data->consider_all_candidates)
2293 use->cost_map[cand->id].cand = cand;
2294 use->cost_map[cand->id].cost = cost;
2295 use->cost_map[cand->id].depends_on = depends_on;
2296 use->cost_map[cand->id].value = value;
2297 return;
2300 /* n_map_members is a power of two, so this computes modulo. */
2301 s = cand->id & (use->n_map_members - 1);
2302 for (i = s; i < use->n_map_members; i++)
2303 if (!use->cost_map[i].cand)
2304 goto found;
2305 for (i = 0; i < s; i++)
2306 if (!use->cost_map[i].cand)
2307 goto found;
2309 gcc_unreachable ();
2311 found:
2312 use->cost_map[i].cand = cand;
2313 use->cost_map[i].cost = cost;
2314 use->cost_map[i].depends_on = depends_on;
2315 use->cost_map[i].value = value;
2318 /* Gets cost of (USE, CANDIDATE) pair. */
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322 struct iv_cand *cand)
2324 unsigned i, s;
2325 struct cost_pair *ret;
2327 if (!cand)
2328 return NULL;
2330 if (data->consider_all_candidates)
2332 ret = use->cost_map + cand->id;
2333 if (!ret->cand)
2334 return NULL;
2336 return ret;
2339 /* n_map_members is a power of two, so this computes modulo. */
2340 s = cand->id & (use->n_map_members - 1);
2341 for (i = s; i < use->n_map_members; i++)
2342 if (use->cost_map[i].cand == cand)
2343 return use->cost_map + i;
2345 for (i = 0; i < s; i++)
2346 if (use->cost_map[i].cand == cand)
2347 return use->cost_map + i;
2349 return NULL;
2352 /* Returns estimate on cost of computing SEQ. */
2354 static unsigned
2355 seq_cost (rtx seq)
2357 unsigned cost = 0;
2358 rtx set;
2360 for (; seq; seq = NEXT_INSN (seq))
2362 set = single_set (seq);
2363 if (set)
2364 cost += rtx_cost (set, SET);
2365 else
2366 cost++;
2369 return cost;
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2373 static rtx
2374 produce_memory_decl_rtl (tree obj, int *regno)
2376 rtx x;
2378 gcc_assert (obj);
2379 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2381 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382 x = gen_rtx_SYMBOL_REF (Pmode, name);
2384 else
2385 x = gen_raw_REG (Pmode, (*regno)++);
2387 return gen_rtx_MEM (DECL_MODE (obj), x);
2390 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2391 walk_tree. DATA contains the actual fake register number. */
2393 static tree
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2396 tree obj = NULL_TREE;
2397 rtx x = NULL_RTX;
2398 int *regno = data;
2400 switch (TREE_CODE (*expr_p))
2402 case ADDR_EXPR:
2403 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404 handled_component_p (*expr_p);
2405 expr_p = &TREE_OPERAND (*expr_p, 0))
2406 continue;
2407 obj = *expr_p;
2408 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409 x = produce_memory_decl_rtl (obj, regno);
2410 break;
2412 case SSA_NAME:
2413 *ws = 0;
2414 obj = SSA_NAME_VAR (*expr_p);
2415 if (!DECL_RTL_SET_P (obj))
2416 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2417 break;
2419 case VAR_DECL:
2420 case PARM_DECL:
2421 case RESULT_DECL:
2422 *ws = 0;
2423 obj = *expr_p;
2425 if (DECL_RTL_SET_P (obj))
2426 break;
2428 if (DECL_MODE (obj) == BLKmode)
2429 x = produce_memory_decl_rtl (obj, regno);
2430 else
2431 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2433 break;
2435 default:
2436 break;
2439 if (x)
2441 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442 SET_DECL_RTL (obj, x);
2445 return NULL_TREE;
2448 /* Determines cost of the computation of EXPR. */
2450 static unsigned
2451 computation_cost (tree expr)
2453 rtx seq, rslt;
2454 tree type = TREE_TYPE (expr);
2455 unsigned cost;
2456 /* Avoid using hard regs in ways which may be unsupported. */
2457 int regno = LAST_VIRTUAL_REGISTER + 1;
2459 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2460 start_sequence ();
2461 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2462 seq = get_insns ();
2463 end_sequence ();
2465 cost = seq_cost (seq);
2466 if (MEM_P (rslt))
2467 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2469 return cost;
2472 /* Returns variable containing the value of candidate CAND at statement AT. */
2474 static tree
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2477 if (stmt_after_increment (loop, cand, stmt))
2478 return cand->var_after;
2479 else
2480 return cand->var_before;
2483 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2484 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2487 tree_int_cst_sign_bit (tree t)
2489 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490 unsigned HOST_WIDE_INT w;
2492 if (bitno < HOST_BITS_PER_WIDE_INT)
2493 w = TREE_INT_CST_LOW (t);
2494 else
2496 w = TREE_INT_CST_HIGH (t);
2497 bitno -= HOST_BITS_PER_WIDE_INT;
2500 return (w >> bitno) & 1;
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504 store cst to MUL and return true. Otherwise return false.
2505 The returned value is always sign-extended, regardless of the
2506 signedness of TOP and BOT. */
2508 static bool
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2511 tree mby;
2512 enum tree_code code;
2513 double_int res, p0, p1;
2514 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2516 STRIP_NOPS (top);
2517 STRIP_NOPS (bot);
2519 if (operand_equal_p (top, bot, 0))
2521 *mul = double_int_one;
2522 return true;
2525 code = TREE_CODE (top);
2526 switch (code)
2528 case MULT_EXPR:
2529 mby = TREE_OPERAND (top, 1);
2530 if (TREE_CODE (mby) != INTEGER_CST)
2531 return false;
2533 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2534 return false;
2536 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2537 precision);
2538 return true;
2540 case PLUS_EXPR:
2541 case MINUS_EXPR:
2542 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2544 return false;
2546 if (code == MINUS_EXPR)
2547 p1 = double_int_neg (p1);
2548 *mul = double_int_sext (double_int_add (p0, p1), precision);
2549 return true;
2551 case INTEGER_CST:
2552 if (TREE_CODE (bot) != INTEGER_CST)
2553 return false;
2555 p0 = double_int_sext (tree_to_double_int (top), precision);
2556 p1 = double_int_sext (tree_to_double_int (bot), precision);
2557 if (double_int_zero_p (p1))
2558 return false;
2559 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2560 precision);
2561 return double_int_zero_p (res);
2563 default:
2564 return false;
2568 /* Folds EXPR using the affine expressions framework. */
2570 static tree
2571 fold_affine_expr (tree expr)
2573 tree type = TREE_TYPE (expr);
2574 struct affine_tree_combination comb;
2576 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2577 return expr;
2579 tree_to_aff_combination (expr, type, &comb);
2580 return aff_combination_to_tree (&comb);
2583 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2584 same precision that is at least as wide as the precision of TYPE, stores
2585 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2586 type of A and B. */
2588 static tree
2589 determine_common_wider_type (tree *a, tree *b)
2591 tree wider_type = NULL;
2592 tree suba, subb;
2593 tree atype = TREE_TYPE (*a);
2595 if ((TREE_CODE (*a) == NOP_EXPR
2596 || TREE_CODE (*a) == CONVERT_EXPR))
2598 suba = TREE_OPERAND (*a, 0);
2599 wider_type = TREE_TYPE (suba);
2600 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2601 return atype;
2603 else
2604 return atype;
2606 if ((TREE_CODE (*b) == NOP_EXPR
2607 || TREE_CODE (*b) == CONVERT_EXPR))
2609 subb = TREE_OPERAND (*b, 0);
2610 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2611 return atype;
2613 else
2614 return atype;
2616 *a = suba;
2617 *b = subb;
2618 return wider_type;
2621 /* Determines the expression by that USE is expressed from induction variable
2622 CAND at statement AT in LOOP. The expression is stored in a decomposed
2623 form into AFF. Returns false if USE cannot be expressed using CAND. */
2625 static bool
2626 get_computation_aff (struct loop *loop,
2627 struct iv_use *use, struct iv_cand *cand, tree at,
2628 struct affine_tree_combination *aff)
2630 tree ubase = use->iv->base;
2631 tree ustep = use->iv->step;
2632 tree cbase = cand->iv->base;
2633 tree cstep = cand->iv->step, cstep_common;
2634 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2635 tree common_type, var;
2636 tree uutype;
2637 aff_tree cbase_aff, var_aff;
2638 double_int rat;
2640 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2642 /* We do not have a precision to express the values of use. */
2643 return false;
2646 var = var_at_stmt (loop, cand, at);
2647 uutype = unsigned_type_for (utype);
2649 /* If the conversion is not noop, perform it. */
2650 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2652 cstep = fold_convert (uutype, cstep);
2653 cbase = fold_convert (uutype, cbase);
2654 var = fold_convert (uutype, var);
2657 if (!constant_multiple_of (ustep, cstep, &rat))
2658 return false;
2660 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2661 type, we achieve better folding by computing their difference in this
2662 wider type, and cast the result to UUTYPE. We do not need to worry about
2663 overflows, as all the arithmetics will in the end be performed in UUTYPE
2664 anyway. */
2665 common_type = determine_common_wider_type (&ubase, &cbase);
2667 /* use = ubase - ratio * cbase + ratio * var. */
2668 tree_to_aff_combination (ubase, common_type, aff);
2669 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2670 tree_to_aff_combination (var, uutype, &var_aff);
2672 /* We need to shift the value if we are after the increment. */
2673 if (stmt_after_increment (loop, cand, at))
2675 aff_tree cstep_aff;
2677 if (common_type != uutype)
2678 cstep_common = fold_convert (common_type, cstep);
2679 else
2680 cstep_common = cstep;
2682 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2683 aff_combination_add (&cbase_aff, &cstep_aff);
2686 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2687 aff_combination_add (aff, &cbase_aff);
2688 if (common_type != uutype)
2689 aff_combination_convert (aff, uutype);
2691 aff_combination_scale (&var_aff, rat);
2692 aff_combination_add (aff, &var_aff);
2694 return true;
2697 /* Determines the expression by that USE is expressed from induction variable
2698 CAND at statement AT in LOOP. The computation is unshared. */
2700 static tree
2701 get_computation_at (struct loop *loop,
2702 struct iv_use *use, struct iv_cand *cand, tree at)
2704 aff_tree aff;
2705 tree type = TREE_TYPE (use->iv->base);
2707 if (!get_computation_aff (loop, use, cand, at, &aff))
2708 return NULL_TREE;
2709 unshare_aff_combination (&aff);
2710 return fold_convert (type, aff_combination_to_tree (&aff));
2713 /* Determines the expression by that USE is expressed from induction variable
2714 CAND in LOOP. The computation is unshared. */
2716 static tree
2717 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2719 return get_computation_at (loop, use, cand, use->stmt);
2722 /* Returns cost of addition in MODE. */
2724 static unsigned
2725 add_cost (enum machine_mode mode)
2727 static unsigned costs[NUM_MACHINE_MODES];
2728 rtx seq;
2729 unsigned cost;
2731 if (costs[mode])
2732 return costs[mode];
2734 start_sequence ();
2735 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2736 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2737 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2738 NULL_RTX);
2739 seq = get_insns ();
2740 end_sequence ();
2742 cost = seq_cost (seq);
2743 if (!cost)
2744 cost = 1;
2746 costs[mode] = cost;
2748 if (dump_file && (dump_flags & TDF_DETAILS))
2749 fprintf (dump_file, "Addition in %s costs %d\n",
2750 GET_MODE_NAME (mode), cost);
2751 return cost;
2754 /* Entry in a hashtable of already known costs for multiplication. */
2755 struct mbc_entry
2757 HOST_WIDE_INT cst; /* The constant to multiply by. */
2758 enum machine_mode mode; /* In mode. */
2759 unsigned cost; /* The cost. */
2762 /* Counts hash value for the ENTRY. */
2764 static hashval_t
2765 mbc_entry_hash (const void *entry)
2767 const struct mbc_entry *e = entry;
2769 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2772 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2774 static int
2775 mbc_entry_eq (const void *entry1, const void *entry2)
2777 const struct mbc_entry *e1 = entry1;
2778 const struct mbc_entry *e2 = entry2;
2780 return (e1->mode == e2->mode
2781 && e1->cst == e2->cst);
2784 /* Returns cost of multiplication by constant CST in MODE. */
2786 unsigned
2787 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2789 static htab_t costs;
2790 struct mbc_entry **cached, act;
2791 rtx seq;
2792 unsigned cost;
2794 if (!costs)
2795 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2797 act.mode = mode;
2798 act.cst = cst;
2799 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2800 if (*cached)
2801 return (*cached)->cost;
2803 *cached = XNEW (struct mbc_entry);
2804 (*cached)->mode = mode;
2805 (*cached)->cst = cst;
2807 start_sequence ();
2808 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2809 gen_int_mode (cst, mode), NULL_RTX, 0);
2810 seq = get_insns ();
2811 end_sequence ();
2813 cost = seq_cost (seq);
2815 if (dump_file && (dump_flags & TDF_DETAILS))
2816 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2817 (int) cst, GET_MODE_NAME (mode), cost);
2819 (*cached)->cost = cost;
2821 return cost;
2824 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2825 validity for a memory reference accessing memory of mode MODE. */
2827 bool
2828 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2830 #define MAX_RATIO 128
2831 static sbitmap valid_mult[MAX_MACHINE_MODE];
2833 if (!valid_mult[mode])
2835 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2836 rtx addr;
2837 HOST_WIDE_INT i;
2839 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2840 sbitmap_zero (valid_mult[mode]);
2841 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2842 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2844 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2845 if (memory_address_p (mode, addr))
2846 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2849 if (dump_file && (dump_flags & TDF_DETAILS))
2851 fprintf (dump_file, " allowed multipliers:");
2852 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2853 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2854 fprintf (dump_file, " %d", (int) i);
2855 fprintf (dump_file, "\n");
2856 fprintf (dump_file, "\n");
2860 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2861 return false;
2863 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2866 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2867 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2868 variable is omitted. Compute the cost for a memory reference that accesses
2869 a memory location of mode MEM_MODE.
2871 TODO -- there must be some better way. This all is quite crude. */
2873 static unsigned
2874 get_address_cost (bool symbol_present, bool var_present,
2875 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2876 enum machine_mode mem_mode)
2878 static bool initialized[MAX_MACHINE_MODE];
2879 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2880 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2881 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2882 unsigned cost, acost;
2883 bool offset_p, ratio_p;
2884 HOST_WIDE_INT s_offset;
2885 unsigned HOST_WIDE_INT mask;
2886 unsigned bits;
2888 if (!initialized[mem_mode])
2890 HOST_WIDE_INT i;
2891 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2892 int old_cse_not_expected;
2893 unsigned sym_p, var_p, off_p, rat_p, add_c;
2894 rtx seq, addr, base;
2895 rtx reg0, reg1;
2897 initialized[mem_mode] = true;
2899 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2901 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2902 for (i = start; i <= 1 << 20; i <<= 1)
2904 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2905 if (!memory_address_p (mem_mode, addr))
2906 break;
2908 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2909 off[mem_mode] = max_offset[mem_mode];
2911 for (i = start; i <= 1 << 20; i <<= 1)
2913 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2914 if (!memory_address_p (mem_mode, addr))
2915 break;
2917 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2919 if (dump_file && (dump_flags & TDF_DETAILS))
2921 fprintf (dump_file, "get_address_cost:\n");
2922 fprintf (dump_file, " min offset %s %d\n",
2923 GET_MODE_NAME (mem_mode),
2924 (int) min_offset[mem_mode]);
2925 fprintf (dump_file, " max offset %s %d\n",
2926 GET_MODE_NAME (mem_mode),
2927 (int) max_offset[mem_mode]);
2930 rat[mem_mode] = 1;
2931 for (i = 2; i <= MAX_RATIO; i++)
2932 if (multiplier_allowed_in_address_p (i, mem_mode))
2934 rat[mem_mode] = i;
2935 break;
2938 /* Compute the cost of various addressing modes. */
2939 acost = 0;
2940 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2941 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2943 for (i = 0; i < 16; i++)
2945 sym_p = i & 1;
2946 var_p = (i >> 1) & 1;
2947 off_p = (i >> 2) & 1;
2948 rat_p = (i >> 3) & 1;
2950 addr = reg0;
2951 if (rat_p)
2952 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2953 gen_int_mode (rat[mem_mode], Pmode));
2955 if (var_p)
2956 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2958 if (sym_p)
2960 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2961 if (off_p)
2962 base = gen_rtx_fmt_e (CONST, Pmode,
2963 gen_rtx_fmt_ee (PLUS, Pmode,
2964 base,
2965 gen_int_mode (off[mem_mode],
2966 Pmode)));
2968 else if (off_p)
2969 base = gen_int_mode (off[mem_mode], Pmode);
2970 else
2971 base = NULL_RTX;
2973 if (base)
2974 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2976 start_sequence ();
2977 /* To avoid splitting addressing modes, pretend that no cse will
2978 follow. */
2979 old_cse_not_expected = cse_not_expected;
2980 cse_not_expected = true;
2981 addr = memory_address (mem_mode, addr);
2982 cse_not_expected = old_cse_not_expected;
2983 seq = get_insns ();
2984 end_sequence ();
2986 acost = seq_cost (seq);
2987 acost += address_cost (addr, mem_mode);
2989 if (!acost)
2990 acost = 1;
2991 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2994 /* On some targets, it is quite expensive to load symbol to a register,
2995 which makes addresses that contain symbols look much more expensive.
2996 However, the symbol will have to be loaded in any case before the
2997 loop (and quite likely we have it in register already), so it does not
2998 make much sense to penalize them too heavily. So make some final
2999 tweaks for the SYMBOL_PRESENT modes:
3001 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3002 var is cheaper, use this mode with small penalty.
3003 If VAR_PRESENT is true, try whether the mode with
3004 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3005 if this is the case, use it. */
3006 add_c = add_cost (Pmode);
3007 for (i = 0; i < 8; i++)
3009 var_p = i & 1;
3010 off_p = (i >> 1) & 1;
3011 rat_p = (i >> 2) & 1;
3013 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3014 if (var_p)
3015 acost += add_c;
3017 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3018 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3021 if (dump_file && (dump_flags & TDF_DETAILS))
3023 fprintf (dump_file, "Address costs:\n");
3025 for (i = 0; i < 16; i++)
3027 sym_p = i & 1;
3028 var_p = (i >> 1) & 1;
3029 off_p = (i >> 2) & 1;
3030 rat_p = (i >> 3) & 1;
3032 fprintf (dump_file, " ");
3033 if (sym_p)
3034 fprintf (dump_file, "sym + ");
3035 if (var_p)
3036 fprintf (dump_file, "var + ");
3037 if (off_p)
3038 fprintf (dump_file, "cst + ");
3039 if (rat_p)
3040 fprintf (dump_file, "rat * ");
3042 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3043 fprintf (dump_file, "index costs %d\n", acost);
3045 fprintf (dump_file, "\n");
3049 bits = GET_MODE_BITSIZE (Pmode);
3050 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3051 offset &= mask;
3052 if ((offset >> (bits - 1) & 1))
3053 offset |= ~mask;
3054 s_offset = offset;
3056 cost = 0;
3057 offset_p = (s_offset != 0
3058 && min_offset[mem_mode] <= s_offset
3059 && s_offset <= max_offset[mem_mode]);
3060 ratio_p = (ratio != 1
3061 && multiplier_allowed_in_address_p (ratio, mem_mode));
3063 if (ratio != 1 && !ratio_p)
3064 cost += multiply_by_cost (ratio, Pmode);
3066 if (s_offset && !offset_p && !symbol_present)
3067 cost += add_cost (Pmode);
3069 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3070 return cost + acost;
3073 /* Estimates cost of forcing expression EXPR into a variable. */
3075 unsigned
3076 force_expr_to_var_cost (tree expr)
3078 static bool costs_initialized = false;
3079 static unsigned integer_cost;
3080 static unsigned symbol_cost;
3081 static unsigned address_cost;
3082 tree op0, op1;
3083 unsigned cost0, cost1, cost;
3084 enum machine_mode mode;
3086 if (!costs_initialized)
3088 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3089 rtx x = gen_rtx_MEM (DECL_MODE (var),
3090 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3091 tree addr;
3092 tree type = build_pointer_type (integer_type_node);
3094 integer_cost = computation_cost (build_int_cst (integer_type_node,
3095 2000));
3097 SET_DECL_RTL (var, x);
3098 TREE_STATIC (var) = 1;
3099 addr = build1 (ADDR_EXPR, type, var);
3100 symbol_cost = computation_cost (addr) + 1;
3102 address_cost
3103 = computation_cost (build2 (PLUS_EXPR, type,
3104 addr,
3105 build_int_cst (type, 2000))) + 1;
3106 if (dump_file && (dump_flags & TDF_DETAILS))
3108 fprintf (dump_file, "force_expr_to_var_cost:\n");
3109 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3110 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3111 fprintf (dump_file, " address %d\n", (int) address_cost);
3112 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3113 fprintf (dump_file, "\n");
3116 costs_initialized = true;
3119 STRIP_NOPS (expr);
3121 if (SSA_VAR_P (expr))
3122 return 0;
3124 if (TREE_INVARIANT (expr))
3126 if (TREE_CODE (expr) == INTEGER_CST)
3127 return integer_cost;
3129 if (TREE_CODE (expr) == ADDR_EXPR)
3131 tree obj = TREE_OPERAND (expr, 0);
3133 if (TREE_CODE (obj) == VAR_DECL
3134 || TREE_CODE (obj) == PARM_DECL
3135 || TREE_CODE (obj) == RESULT_DECL)
3136 return symbol_cost;
3139 return address_cost;
3142 switch (TREE_CODE (expr))
3144 case PLUS_EXPR:
3145 case MINUS_EXPR:
3146 case MULT_EXPR:
3147 op0 = TREE_OPERAND (expr, 0);
3148 op1 = TREE_OPERAND (expr, 1);
3149 STRIP_NOPS (op0);
3150 STRIP_NOPS (op1);
3152 if (is_gimple_val (op0))
3153 cost0 = 0;
3154 else
3155 cost0 = force_expr_to_var_cost (op0);
3157 if (is_gimple_val (op1))
3158 cost1 = 0;
3159 else
3160 cost1 = force_expr_to_var_cost (op1);
3162 break;
3164 default:
3165 /* Just an arbitrary value, FIXME. */
3166 return target_spill_cost;
3169 mode = TYPE_MODE (TREE_TYPE (expr));
3170 switch (TREE_CODE (expr))
3172 case PLUS_EXPR:
3173 case MINUS_EXPR:
3174 cost = add_cost (mode);
3175 break;
3177 case MULT_EXPR:
3178 if (cst_and_fits_in_hwi (op0))
3179 cost = multiply_by_cost (int_cst_value (op0), mode);
3180 else if (cst_and_fits_in_hwi (op1))
3181 cost = multiply_by_cost (int_cst_value (op1), mode);
3182 else
3183 return target_spill_cost;
3184 break;
3186 default:
3187 gcc_unreachable ();
3190 cost += cost0;
3191 cost += cost1;
3193 /* Bound the cost by target_spill_cost. The parts of complicated
3194 computations often are either loop invariant or at least can
3195 be shared between several iv uses, so letting this grow without
3196 limits would not give reasonable results. */
3197 return cost < target_spill_cost ? cost : target_spill_cost;
3200 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3201 invariants the computation depends on. */
3203 static unsigned
3204 force_var_cost (struct ivopts_data *data,
3205 tree expr, bitmap *depends_on)
3207 if (depends_on)
3209 fd_ivopts_data = data;
3210 walk_tree (&expr, find_depends, depends_on, NULL);
3213 return force_expr_to_var_cost (expr);
3216 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3217 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3218 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3219 invariants the computation depends on. */
3221 static unsigned
3222 split_address_cost (struct ivopts_data *data,
3223 tree addr, bool *symbol_present, bool *var_present,
3224 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3226 tree core;
3227 HOST_WIDE_INT bitsize;
3228 HOST_WIDE_INT bitpos;
3229 tree toffset;
3230 enum machine_mode mode;
3231 int unsignedp, volatilep;
3233 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3234 &unsignedp, &volatilep, false);
3236 if (toffset != 0
3237 || bitpos % BITS_PER_UNIT != 0
3238 || TREE_CODE (core) != VAR_DECL)
3240 *symbol_present = false;
3241 *var_present = true;
3242 fd_ivopts_data = data;
3243 walk_tree (&addr, find_depends, depends_on, NULL);
3244 return target_spill_cost;
3247 *offset += bitpos / BITS_PER_UNIT;
3248 if (TREE_STATIC (core)
3249 || DECL_EXTERNAL (core))
3251 *symbol_present = true;
3252 *var_present = false;
3253 return 0;
3256 *symbol_present = false;
3257 *var_present = true;
3258 return 0;
3261 /* Estimates cost of expressing difference of addresses E1 - E2 as
3262 var + symbol + offset. The value of offset is added to OFFSET,
3263 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3264 part is missing. DEPENDS_ON is a set of the invariants the computation
3265 depends on. */
3267 static unsigned
3268 ptr_difference_cost (struct ivopts_data *data,
3269 tree e1, tree e2, bool *symbol_present, bool *var_present,
3270 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3272 HOST_WIDE_INT diff = 0;
3273 unsigned cost;
3275 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3277 if (ptr_difference_const (e1, e2, &diff))
3279 *offset += diff;
3280 *symbol_present = false;
3281 *var_present = false;
3282 return 0;
3285 if (e2 == integer_zero_node)
3286 return split_address_cost (data, TREE_OPERAND (e1, 0),
3287 symbol_present, var_present, offset, depends_on);
3289 *symbol_present = false;
3290 *var_present = true;
3292 cost = force_var_cost (data, e1, depends_on);
3293 cost += force_var_cost (data, e2, depends_on);
3294 cost += add_cost (Pmode);
3296 return cost;
3299 /* Estimates cost of expressing difference E1 - E2 as
3300 var + symbol + offset. The value of offset is added to OFFSET,
3301 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3302 part is missing. DEPENDS_ON is a set of the invariants the computation
3303 depends on. */
3305 static unsigned
3306 difference_cost (struct ivopts_data *data,
3307 tree e1, tree e2, bool *symbol_present, bool *var_present,
3308 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3310 unsigned cost;
3311 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3312 unsigned HOST_WIDE_INT off1, off2;
3314 e1 = strip_offset (e1, &off1);
3315 e2 = strip_offset (e2, &off2);
3316 *offset += off1 - off2;
3318 STRIP_NOPS (e1);
3319 STRIP_NOPS (e2);
3321 if (TREE_CODE (e1) == ADDR_EXPR)
3322 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3323 depends_on);
3324 *symbol_present = false;
3326 if (operand_equal_p (e1, e2, 0))
3328 *var_present = false;
3329 return 0;
3331 *var_present = true;
3332 if (integer_zerop (e2))
3333 return force_var_cost (data, e1, depends_on);
3335 if (integer_zerop (e1))
3337 cost = force_var_cost (data, e2, depends_on);
3338 cost += multiply_by_cost (-1, mode);
3340 return cost;
3343 cost = force_var_cost (data, e1, depends_on);
3344 cost += force_var_cost (data, e2, depends_on);
3345 cost += add_cost (mode);
3347 return cost;
3350 /* Determines the cost of the computation by that USE is expressed
3351 from induction variable CAND. If ADDRESS_P is true, we just need
3352 to create an address from it, otherwise we want to get it into
3353 register. A set of invariants we depend on is stored in
3354 DEPENDS_ON. AT is the statement at that the value is computed. */
3356 static unsigned
3357 get_computation_cost_at (struct ivopts_data *data,
3358 struct iv_use *use, struct iv_cand *cand,
3359 bool address_p, bitmap *depends_on, tree at)
3361 tree ubase = use->iv->base, ustep = use->iv->step;
3362 tree cbase, cstep;
3363 tree utype = TREE_TYPE (ubase), ctype;
3364 unsigned HOST_WIDE_INT cstepi, offset = 0;
3365 HOST_WIDE_INT ratio, aratio;
3366 bool var_present, symbol_present;
3367 unsigned cost = 0, n_sums;
3368 double_int rat;
3370 *depends_on = NULL;
3372 /* Only consider real candidates. */
3373 if (!cand->iv)
3374 return INFTY;
3376 cbase = cand->iv->base;
3377 cstep = cand->iv->step;
3378 ctype = TREE_TYPE (cbase);
3380 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3382 /* We do not have a precision to express the values of use. */
3383 return INFTY;
3386 if (address_p)
3388 /* Do not try to express address of an object with computation based
3389 on address of a different object. This may cause problems in rtl
3390 level alias analysis (that does not expect this to be happening,
3391 as this is illegal in C), and would be unlikely to be useful
3392 anyway. */
3393 if (use->iv->base_object
3394 && cand->iv->base_object
3395 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3396 return INFTY;
3399 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3401 /* TODO -- add direct handling of this case. */
3402 goto fallback;
3405 /* CSTEPI is removed from the offset in case statement is after the
3406 increment. If the step is not constant, we use zero instead.
3407 This is a bit imprecise (there is the extra addition), but
3408 redundancy elimination is likely to transform the code so that
3409 it uses value of the variable before increment anyway,
3410 so it is not that much unrealistic. */
3411 if (cst_and_fits_in_hwi (cstep))
3412 cstepi = int_cst_value (cstep);
3413 else
3414 cstepi = 0;
3416 if (!constant_multiple_of (ustep, cstep, &rat))
3417 return INFTY;
3419 if (double_int_fits_in_shwi_p (rat))
3420 ratio = double_int_to_shwi (rat);
3421 else
3422 return INFTY;
3424 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3425 or ratio == 1, it is better to handle this like
3427 ubase - ratio * cbase + ratio * var
3429 (also holds in the case ratio == -1, TODO. */
3431 if (cst_and_fits_in_hwi (cbase))
3433 offset = - ratio * int_cst_value (cbase);
3434 cost += difference_cost (data,
3435 ubase, integer_zero_node,
3436 &symbol_present, &var_present, &offset,
3437 depends_on);
3439 else if (ratio == 1)
3441 cost += difference_cost (data,
3442 ubase, cbase,
3443 &symbol_present, &var_present, &offset,
3444 depends_on);
3446 else
3448 cost += force_var_cost (data, cbase, depends_on);
3449 cost += add_cost (TYPE_MODE (ctype));
3450 cost += difference_cost (data,
3451 ubase, integer_zero_node,
3452 &symbol_present, &var_present, &offset,
3453 depends_on);
3456 /* If we are after the increment, the value of the candidate is higher by
3457 one iteration. */
3458 if (stmt_after_increment (data->current_loop, cand, at))
3459 offset -= ratio * cstepi;
3461 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3462 (symbol/var/const parts may be omitted). If we are looking for an address,
3463 find the cost of addressing this. */
3464 if (address_p)
3465 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3466 TYPE_MODE (TREE_TYPE (*use->op_p)));
3468 /* Otherwise estimate the costs for computing the expression. */
3469 aratio = ratio > 0 ? ratio : -ratio;
3470 if (!symbol_present && !var_present && !offset)
3472 if (ratio != 1)
3473 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3475 return cost;
3478 if (aratio != 1)
3479 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3481 n_sums = 1;
3482 if (var_present
3483 /* Symbol + offset should be compile-time computable. */
3484 && (symbol_present || offset))
3485 n_sums++;
3487 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3489 fallback:
3491 /* Just get the expression, expand it and measure the cost. */
3492 tree comp = get_computation_at (data->current_loop, use, cand, at);
3494 if (!comp)
3495 return INFTY;
3497 if (address_p)
3498 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3500 return computation_cost (comp);
3504 /* Determines the cost of the computation by that USE is expressed
3505 from induction variable CAND. If ADDRESS_P is true, we just need
3506 to create an address from it, otherwise we want to get it into
3507 register. A set of invariants we depend on is stored in
3508 DEPENDS_ON. */
3510 static unsigned
3511 get_computation_cost (struct ivopts_data *data,
3512 struct iv_use *use, struct iv_cand *cand,
3513 bool address_p, bitmap *depends_on)
3515 return get_computation_cost_at (data,
3516 use, cand, address_p, depends_on, use->stmt);
3519 /* Determines cost of basing replacement of USE on CAND in a generic
3520 expression. */
3522 static bool
3523 determine_use_iv_cost_generic (struct ivopts_data *data,
3524 struct iv_use *use, struct iv_cand *cand)
3526 bitmap depends_on;
3527 unsigned cost;
3529 /* The simple case first -- if we need to express value of the preserved
3530 original biv, the cost is 0. This also prevents us from counting the
3531 cost of increment twice -- once at this use and once in the cost of
3532 the candidate. */
3533 if (cand->pos == IP_ORIGINAL
3534 && cand->incremented_at == use->stmt)
3536 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3537 return true;
3540 cost = get_computation_cost (data, use, cand, false, &depends_on);
3541 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3543 return cost != INFTY;
3546 /* Determines cost of basing replacement of USE on CAND in an address. */
3548 static bool
3549 determine_use_iv_cost_address (struct ivopts_data *data,
3550 struct iv_use *use, struct iv_cand *cand)
3552 bitmap depends_on;
3553 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3555 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3557 return cost != INFTY;
3560 /* Computes value of induction variable IV in iteration NITER. */
3562 static tree
3563 iv_value (struct iv *iv, tree niter)
3565 tree val;
3566 tree type = TREE_TYPE (iv->base);
3568 niter = fold_convert (type, niter);
3569 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3571 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3574 /* Computes value of candidate CAND at position AT in iteration NITER. */
3576 static tree
3577 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3579 tree val = iv_value (cand->iv, niter);
3580 tree type = TREE_TYPE (cand->iv->base);
3582 if (stmt_after_increment (loop, cand, at))
3583 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3585 return val;
3588 /* Returns period of induction variable iv. */
3590 static tree
3591 iv_period (struct iv *iv)
3593 tree step = iv->step, period, type;
3594 tree pow2div;
3596 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3598 /* Period of the iv is gcd (step, type range). Since type range is power
3599 of two, it suffices to determine the maximum power of two that divides
3600 step. */
3601 pow2div = num_ending_zeros (step);
3602 type = unsigned_type_for (TREE_TYPE (step));
3604 period = build_low_bits_mask (type,
3605 (TYPE_PRECISION (type)
3606 - tree_low_cst (pow2div, 1)));
3608 return period;
3611 /* Returns the comparison operator used when eliminating the iv USE. */
3613 static enum tree_code
3614 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3616 struct loop *loop = data->current_loop;
3617 basic_block ex_bb;
3618 edge exit;
3620 ex_bb = bb_for_stmt (use->stmt);
3621 exit = EDGE_SUCC (ex_bb, 0);
3622 if (flow_bb_inside_loop_p (loop, exit->dest))
3623 exit = EDGE_SUCC (ex_bb, 1);
3625 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3628 /* Check whether it is possible to express the condition in USE by comparison
3629 of candidate CAND. If so, store the value compared with to BOUND. */
3631 static bool
3632 may_eliminate_iv (struct ivopts_data *data,
3633 struct iv_use *use, struct iv_cand *cand, tree *bound)
3635 basic_block ex_bb;
3636 edge exit;
3637 tree nit, nit_type;
3638 tree wider_type, period, per_type;
3639 struct loop *loop = data->current_loop;
3641 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3642 return false;
3644 /* For now works only for exits that dominate the loop latch. TODO -- extend
3645 for other conditions inside loop body. */
3646 ex_bb = bb_for_stmt (use->stmt);
3647 if (use->stmt != last_stmt (ex_bb)
3648 || TREE_CODE (use->stmt) != COND_EXPR)
3649 return false;
3650 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3651 return false;
3653 exit = EDGE_SUCC (ex_bb, 0);
3654 if (flow_bb_inside_loop_p (loop, exit->dest))
3655 exit = EDGE_SUCC (ex_bb, 1);
3656 if (flow_bb_inside_loop_p (loop, exit->dest))
3657 return false;
3659 nit = niter_for_exit (data, exit);
3660 if (!nit)
3661 return false;
3663 nit_type = TREE_TYPE (nit);
3665 /* Determine whether we may use the variable to test whether niter iterations
3666 elapsed. This is the case iff the period of the induction variable is
3667 greater than the number of iterations. */
3668 period = iv_period (cand->iv);
3669 if (!period)
3670 return false;
3671 per_type = TREE_TYPE (period);
3673 wider_type = TREE_TYPE (period);
3674 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3675 wider_type = per_type;
3676 else
3677 wider_type = nit_type;
3679 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3680 fold_convert (wider_type, period),
3681 fold_convert (wider_type, nit))))
3682 return false;
3684 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
3685 return true;
3688 /* Determines cost of basing replacement of USE on CAND in a condition. */
3690 static bool
3691 determine_use_iv_cost_condition (struct ivopts_data *data,
3692 struct iv_use *use, struct iv_cand *cand)
3694 tree bound = NULL_TREE, op, cond;
3695 bitmap depends_on = NULL;
3696 unsigned cost;
3698 /* Only consider real candidates. */
3699 if (!cand->iv)
3701 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3702 return false;
3705 if (may_eliminate_iv (data, use, cand, &bound))
3707 cost = force_var_cost (data, bound, &depends_on);
3709 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3710 return cost != INFTY;
3713 /* The induction variable elimination failed; just express the original
3714 giv. If it is compared with an invariant, note that we cannot get
3715 rid of it. */
3716 cost = get_computation_cost (data, use, cand, false, &depends_on);
3718 cond = *use->op_p;
3719 if (TREE_CODE (cond) != SSA_NAME)
3721 op = TREE_OPERAND (cond, 0);
3722 if (TREE_CODE (op) == SSA_NAME
3723 && !integer_zerop (get_iv (data, op)->step))
3724 op = TREE_OPERAND (cond, 1);
3725 if (TREE_CODE (op) == SSA_NAME)
3727 op = get_iv (data, op)->base;
3728 fd_ivopts_data = data;
3729 walk_tree (&op, find_depends, &depends_on, NULL);
3733 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3734 return cost != INFTY;
3737 /* Determines cost of basing replacement of USE on CAND. Returns false
3738 if USE cannot be based on CAND. */
3740 static bool
3741 determine_use_iv_cost (struct ivopts_data *data,
3742 struct iv_use *use, struct iv_cand *cand)
3744 switch (use->type)
3746 case USE_NONLINEAR_EXPR:
3747 return determine_use_iv_cost_generic (data, use, cand);
3749 case USE_ADDRESS:
3750 return determine_use_iv_cost_address (data, use, cand);
3752 case USE_COMPARE:
3753 return determine_use_iv_cost_condition (data, use, cand);
3755 default:
3756 gcc_unreachable ();
3760 /* Determines costs of basing the use of the iv on an iv candidate. */
3762 static void
3763 determine_use_iv_costs (struct ivopts_data *data)
3765 unsigned i, j;
3766 struct iv_use *use;
3767 struct iv_cand *cand;
3768 bitmap to_clear = BITMAP_ALLOC (NULL);
3770 alloc_use_cost_map (data);
3772 for (i = 0; i < n_iv_uses (data); i++)
3774 use = iv_use (data, i);
3776 if (data->consider_all_candidates)
3778 for (j = 0; j < n_iv_cands (data); j++)
3780 cand = iv_cand (data, j);
3781 determine_use_iv_cost (data, use, cand);
3784 else
3786 bitmap_iterator bi;
3788 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3790 cand = iv_cand (data, j);
3791 if (!determine_use_iv_cost (data, use, cand))
3792 bitmap_set_bit (to_clear, j);
3795 /* Remove the candidates for that the cost is infinite from
3796 the list of related candidates. */
3797 bitmap_and_compl_into (use->related_cands, to_clear);
3798 bitmap_clear (to_clear);
3802 BITMAP_FREE (to_clear);
3804 if (dump_file && (dump_flags & TDF_DETAILS))
3806 fprintf (dump_file, "Use-candidate costs:\n");
3808 for (i = 0; i < n_iv_uses (data); i++)
3810 use = iv_use (data, i);
3812 fprintf (dump_file, "Use %d:\n", i);
3813 fprintf (dump_file, " cand\tcost\tdepends on\n");
3814 for (j = 0; j < use->n_map_members; j++)
3816 if (!use->cost_map[j].cand
3817 || use->cost_map[j].cost == INFTY)
3818 continue;
3820 fprintf (dump_file, " %d\t%d\t",
3821 use->cost_map[j].cand->id,
3822 use->cost_map[j].cost);
3823 if (use->cost_map[j].depends_on)
3824 bitmap_print (dump_file,
3825 use->cost_map[j].depends_on, "","");
3826 fprintf (dump_file, "\n");
3829 fprintf (dump_file, "\n");
3831 fprintf (dump_file, "\n");
3835 /* Determines cost of the candidate CAND. */
3837 static void
3838 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3840 unsigned cost_base, cost_step;
3841 tree base;
3843 if (!cand->iv)
3845 cand->cost = 0;
3846 return;
3849 /* There are two costs associated with the candidate -- its increment
3850 and its initialization. The second is almost negligible for any loop
3851 that rolls enough, so we take it just very little into account. */
3853 base = cand->iv->base;
3854 cost_base = force_var_cost (data, base, NULL);
3855 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3857 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3859 /* Prefer the original iv unless we may gain something by replacing it;
3860 this is not really relevant for artificial ivs created by other
3861 passes. */
3862 if (cand->pos == IP_ORIGINAL
3863 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3864 cand->cost--;
3866 /* Prefer not to insert statements into latch unless there are some
3867 already (so that we do not create unnecessary jumps). */
3868 if (cand->pos == IP_END
3869 && empty_block_p (ip_end_pos (data->current_loop)))
3870 cand->cost++;
3873 /* Determines costs of computation of the candidates. */
3875 static void
3876 determine_iv_costs (struct ivopts_data *data)
3878 unsigned i;
3880 if (dump_file && (dump_flags & TDF_DETAILS))
3882 fprintf (dump_file, "Candidate costs:\n");
3883 fprintf (dump_file, " cand\tcost\n");
3886 for (i = 0; i < n_iv_cands (data); i++)
3888 struct iv_cand *cand = iv_cand (data, i);
3890 determine_iv_cost (data, cand);
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3893 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3896 if (dump_file && (dump_flags & TDF_DETAILS))
3897 fprintf (dump_file, "\n");
3900 /* Calculates cost for having SIZE induction variables. */
3902 static unsigned
3903 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3905 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3908 /* For each size of the induction variable set determine the penalty. */
3910 static void
3911 determine_set_costs (struct ivopts_data *data)
3913 unsigned j, n;
3914 tree phi, op;
3915 struct loop *loop = data->current_loop;
3916 bitmap_iterator bi;
3918 /* We use the following model (definitely improvable, especially the
3919 cost function -- TODO):
3921 We estimate the number of registers available (using MD data), name it A.
3923 We estimate the number of registers used by the loop, name it U. This
3924 number is obtained as the number of loop phi nodes (not counting virtual
3925 registers and bivs) + the number of variables from outside of the loop.
3927 We set a reserve R (free regs that are used for temporary computations,
3928 etc.). For now the reserve is a constant 3.
3930 Let I be the number of induction variables.
3932 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3933 make a lot of ivs without a reason).
3934 -- if A - R < U + I <= A, the cost is I * PRES_COST
3935 -- if U + I > A, the cost is I * PRES_COST and
3936 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3938 if (dump_file && (dump_flags & TDF_DETAILS))
3940 fprintf (dump_file, "Global costs:\n");
3941 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3942 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3943 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3944 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3947 n = 0;
3948 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3950 op = PHI_RESULT (phi);
3952 if (!is_gimple_reg (op))
3953 continue;
3955 if (get_iv (data, op))
3956 continue;
3958 n++;
3961 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3963 struct version_info *info = ver_info (data, j);
3965 if (info->inv_id && info->has_nonlin_use)
3966 n++;
3969 data->regs_used = n;
3970 if (dump_file && (dump_flags & TDF_DETAILS))
3971 fprintf (dump_file, " regs_used %d\n", n);
3973 if (dump_file && (dump_flags & TDF_DETAILS))
3975 fprintf (dump_file, " cost for size:\n");
3976 fprintf (dump_file, " ivs\tcost\n");
3977 for (j = 0; j <= 2 * target_avail_regs; j++)
3978 fprintf (dump_file, " %d\t%d\n", j,
3979 ivopts_global_cost_for_size (data, j));
3980 fprintf (dump_file, "\n");
3984 /* Returns true if A is a cheaper cost pair than B. */
3986 static bool
3987 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3989 if (!a)
3990 return false;
3992 if (!b)
3993 return true;
3995 if (a->cost < b->cost)
3996 return true;
3998 if (a->cost > b->cost)
3999 return false;
4001 /* In case the costs are the same, prefer the cheaper candidate. */
4002 if (a->cand->cost < b->cand->cost)
4003 return true;
4005 return false;
4008 /* Computes the cost field of IVS structure. */
4010 static void
4011 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4013 unsigned cost = 0;
4015 cost += ivs->cand_use_cost;
4016 cost += ivs->cand_cost;
4017 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4019 ivs->cost = cost;
4022 /* Remove invariants in set INVS to set IVS. */
4024 static void
4025 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4027 bitmap_iterator bi;
4028 unsigned iid;
4030 if (!invs)
4031 return;
4033 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4035 ivs->n_invariant_uses[iid]--;
4036 if (ivs->n_invariant_uses[iid] == 0)
4037 ivs->n_regs--;
4041 /* Set USE not to be expressed by any candidate in IVS. */
4043 static void
4044 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4045 struct iv_use *use)
4047 unsigned uid = use->id, cid;
4048 struct cost_pair *cp;
4050 cp = ivs->cand_for_use[uid];
4051 if (!cp)
4052 return;
4053 cid = cp->cand->id;
4055 ivs->bad_uses++;
4056 ivs->cand_for_use[uid] = NULL;
4057 ivs->n_cand_uses[cid]--;
4059 if (ivs->n_cand_uses[cid] == 0)
4061 bitmap_clear_bit (ivs->cands, cid);
4062 /* Do not count the pseudocandidates. */
4063 if (cp->cand->iv)
4064 ivs->n_regs--;
4065 ivs->n_cands--;
4066 ivs->cand_cost -= cp->cand->cost;
4068 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4071 ivs->cand_use_cost -= cp->cost;
4073 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4074 iv_ca_recount_cost (data, ivs);
4077 /* Add invariants in set INVS to set IVS. */
4079 static void
4080 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4082 bitmap_iterator bi;
4083 unsigned iid;
4085 if (!invs)
4086 return;
4088 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4090 ivs->n_invariant_uses[iid]++;
4091 if (ivs->n_invariant_uses[iid] == 1)
4092 ivs->n_regs++;
4096 /* Set cost pair for USE in set IVS to CP. */
4098 static void
4099 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4100 struct iv_use *use, struct cost_pair *cp)
4102 unsigned uid = use->id, cid;
4104 if (ivs->cand_for_use[uid] == cp)
4105 return;
4107 if (ivs->cand_for_use[uid])
4108 iv_ca_set_no_cp (data, ivs, use);
4110 if (cp)
4112 cid = cp->cand->id;
4114 ivs->bad_uses--;
4115 ivs->cand_for_use[uid] = cp;
4116 ivs->n_cand_uses[cid]++;
4117 if (ivs->n_cand_uses[cid] == 1)
4119 bitmap_set_bit (ivs->cands, cid);
4120 /* Do not count the pseudocandidates. */
4121 if (cp->cand->iv)
4122 ivs->n_regs++;
4123 ivs->n_cands++;
4124 ivs->cand_cost += cp->cand->cost;
4126 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4129 ivs->cand_use_cost += cp->cost;
4130 iv_ca_set_add_invariants (ivs, cp->depends_on);
4131 iv_ca_recount_cost (data, ivs);
4135 /* Extend set IVS by expressing USE by some of the candidates in it
4136 if possible. */
4138 static void
4139 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4140 struct iv_use *use)
4142 struct cost_pair *best_cp = NULL, *cp;
4143 bitmap_iterator bi;
4144 unsigned i;
4146 gcc_assert (ivs->upto >= use->id);
4148 if (ivs->upto == use->id)
4150 ivs->upto++;
4151 ivs->bad_uses++;
4154 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4156 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4158 if (cheaper_cost_pair (cp, best_cp))
4159 best_cp = cp;
4162 iv_ca_set_cp (data, ivs, use, best_cp);
4165 /* Get cost for assignment IVS. */
4167 static unsigned
4168 iv_ca_cost (struct iv_ca *ivs)
4170 return (ivs->bad_uses ? INFTY : ivs->cost);
4173 /* Returns true if all dependences of CP are among invariants in IVS. */
4175 static bool
4176 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4178 unsigned i;
4179 bitmap_iterator bi;
4181 if (!cp->depends_on)
4182 return true;
4184 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4186 if (ivs->n_invariant_uses[i] == 0)
4187 return false;
4190 return true;
4193 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4194 it before NEXT_CHANGE. */
4196 static struct iv_ca_delta *
4197 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4198 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4200 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4202 change->use = use;
4203 change->old_cp = old_cp;
4204 change->new_cp = new_cp;
4205 change->next_change = next_change;
4207 return change;
4210 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4211 are rewritten. */
4213 static struct iv_ca_delta *
4214 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4216 struct iv_ca_delta *last;
4218 if (!l2)
4219 return l1;
4221 if (!l1)
4222 return l2;
4224 for (last = l1; last->next_change; last = last->next_change)
4225 continue;
4226 last->next_change = l2;
4228 return l1;
4231 /* Returns candidate by that USE is expressed in IVS. */
4233 static struct cost_pair *
4234 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4236 return ivs->cand_for_use[use->id];
4239 /* Reverse the list of changes DELTA, forming the inverse to it. */
4241 static struct iv_ca_delta *
4242 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4244 struct iv_ca_delta *act, *next, *prev = NULL;
4245 struct cost_pair *tmp;
4247 for (act = delta; act; act = next)
4249 next = act->next_change;
4250 act->next_change = prev;
4251 prev = act;
4253 tmp = act->old_cp;
4254 act->old_cp = act->new_cp;
4255 act->new_cp = tmp;
4258 return prev;
4261 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4262 reverted instead. */
4264 static void
4265 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4266 struct iv_ca_delta *delta, bool forward)
4268 struct cost_pair *from, *to;
4269 struct iv_ca_delta *act;
4271 if (!forward)
4272 delta = iv_ca_delta_reverse (delta);
4274 for (act = delta; act; act = act->next_change)
4276 from = act->old_cp;
4277 to = act->new_cp;
4278 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4279 iv_ca_set_cp (data, ivs, act->use, to);
4282 if (!forward)
4283 iv_ca_delta_reverse (delta);
4286 /* Returns true if CAND is used in IVS. */
4288 static bool
4289 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4291 return ivs->n_cand_uses[cand->id] > 0;
4294 /* Returns number of induction variable candidates in the set IVS. */
4296 static unsigned
4297 iv_ca_n_cands (struct iv_ca *ivs)
4299 return ivs->n_cands;
4302 /* Free the list of changes DELTA. */
4304 static void
4305 iv_ca_delta_free (struct iv_ca_delta **delta)
4307 struct iv_ca_delta *act, *next;
4309 for (act = *delta; act; act = next)
4311 next = act->next_change;
4312 free (act);
4315 *delta = NULL;
4318 /* Allocates new iv candidates assignment. */
4320 static struct iv_ca *
4321 iv_ca_new (struct ivopts_data *data)
4323 struct iv_ca *nw = XNEW (struct iv_ca);
4325 nw->upto = 0;
4326 nw->bad_uses = 0;
4327 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4328 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4329 nw->cands = BITMAP_ALLOC (NULL);
4330 nw->n_cands = 0;
4331 nw->n_regs = 0;
4332 nw->cand_use_cost = 0;
4333 nw->cand_cost = 0;
4334 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4335 nw->cost = 0;
4337 return nw;
4340 /* Free memory occupied by the set IVS. */
4342 static void
4343 iv_ca_free (struct iv_ca **ivs)
4345 free ((*ivs)->cand_for_use);
4346 free ((*ivs)->n_cand_uses);
4347 BITMAP_FREE ((*ivs)->cands);
4348 free ((*ivs)->n_invariant_uses);
4349 free (*ivs);
4350 *ivs = NULL;
4353 /* Dumps IVS to FILE. */
4355 static void
4356 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4358 const char *pref = " invariants ";
4359 unsigned i;
4361 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4362 bitmap_print (file, ivs->cands, " candidates ","\n");
4364 for (i = 1; i <= data->max_inv_id; i++)
4365 if (ivs->n_invariant_uses[i])
4367 fprintf (file, "%s%d", pref, i);
4368 pref = ", ";
4370 fprintf (file, "\n");
4373 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4374 new set, and store differences in DELTA. Number of induction variables
4375 in the new set is stored to N_IVS. */
4377 static unsigned
4378 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4379 struct iv_cand *cand, struct iv_ca_delta **delta,
4380 unsigned *n_ivs)
4382 unsigned i, cost;
4383 struct iv_use *use;
4384 struct cost_pair *old_cp, *new_cp;
4386 *delta = NULL;
4387 for (i = 0; i < ivs->upto; i++)
4389 use = iv_use (data, i);
4390 old_cp = iv_ca_cand_for_use (ivs, use);
4392 if (old_cp
4393 && old_cp->cand == cand)
4394 continue;
4396 new_cp = get_use_iv_cost (data, use, cand);
4397 if (!new_cp)
4398 continue;
4400 if (!iv_ca_has_deps (ivs, new_cp))
4401 continue;
4403 if (!cheaper_cost_pair (new_cp, old_cp))
4404 continue;
4406 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4409 iv_ca_delta_commit (data, ivs, *delta, true);
4410 cost = iv_ca_cost (ivs);
4411 if (n_ivs)
4412 *n_ivs = iv_ca_n_cands (ivs);
4413 iv_ca_delta_commit (data, ivs, *delta, false);
4415 return cost;
4418 /* Try narrowing set IVS by removing CAND. Return the cost of
4419 the new set and store the differences in DELTA. */
4421 static unsigned
4422 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4423 struct iv_cand *cand, struct iv_ca_delta **delta)
4425 unsigned i, ci;
4426 struct iv_use *use;
4427 struct cost_pair *old_cp, *new_cp, *cp;
4428 bitmap_iterator bi;
4429 struct iv_cand *cnd;
4430 unsigned cost;
4432 *delta = NULL;
4433 for (i = 0; i < n_iv_uses (data); i++)
4435 use = iv_use (data, i);
4437 old_cp = iv_ca_cand_for_use (ivs, use);
4438 if (old_cp->cand != cand)
4439 continue;
4441 new_cp = NULL;
4443 if (data->consider_all_candidates)
4445 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4447 if (ci == cand->id)
4448 continue;
4450 cnd = iv_cand (data, ci);
4452 cp = get_use_iv_cost (data, use, cnd);
4453 if (!cp)
4454 continue;
4455 if (!iv_ca_has_deps (ivs, cp))
4456 continue;
4458 if (!cheaper_cost_pair (cp, new_cp))
4459 continue;
4461 new_cp = cp;
4464 else
4466 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4468 if (ci == cand->id)
4469 continue;
4471 cnd = iv_cand (data, ci);
4473 cp = get_use_iv_cost (data, use, cnd);
4474 if (!cp)
4475 continue;
4476 if (!iv_ca_has_deps (ivs, cp))
4477 continue;
4479 if (!cheaper_cost_pair (cp, new_cp))
4480 continue;
4482 new_cp = cp;
4486 if (!new_cp)
4488 iv_ca_delta_free (delta);
4489 return INFTY;
4492 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4495 iv_ca_delta_commit (data, ivs, *delta, true);
4496 cost = iv_ca_cost (ivs);
4497 iv_ca_delta_commit (data, ivs, *delta, false);
4499 return cost;
4502 /* Try optimizing the set of candidates IVS by removing candidates different
4503 from to EXCEPT_CAND from it. Return cost of the new set, and store
4504 differences in DELTA. */
4506 static unsigned
4507 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4508 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4510 bitmap_iterator bi;
4511 struct iv_ca_delta *act_delta, *best_delta;
4512 unsigned i, best_cost, acost;
4513 struct iv_cand *cand;
4515 best_delta = NULL;
4516 best_cost = iv_ca_cost (ivs);
4518 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4520 cand = iv_cand (data, i);
4522 if (cand == except_cand)
4523 continue;
4525 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4527 if (acost < best_cost)
4529 best_cost = acost;
4530 iv_ca_delta_free (&best_delta);
4531 best_delta = act_delta;
4533 else
4534 iv_ca_delta_free (&act_delta);
4537 if (!best_delta)
4539 *delta = NULL;
4540 return best_cost;
4543 /* Recurse to possibly remove other unnecessary ivs. */
4544 iv_ca_delta_commit (data, ivs, best_delta, true);
4545 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4546 iv_ca_delta_commit (data, ivs, best_delta, false);
4547 *delta = iv_ca_delta_join (best_delta, *delta);
4548 return best_cost;
4551 /* Tries to extend the sets IVS in the best possible way in order
4552 to express the USE. */
4554 static bool
4555 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4556 struct iv_use *use)
4558 unsigned best_cost, act_cost;
4559 unsigned i;
4560 bitmap_iterator bi;
4561 struct iv_cand *cand;
4562 struct iv_ca_delta *best_delta = NULL, *act_delta;
4563 struct cost_pair *cp;
4565 iv_ca_add_use (data, ivs, use);
4566 best_cost = iv_ca_cost (ivs);
4568 cp = iv_ca_cand_for_use (ivs, use);
4569 if (cp)
4571 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4572 iv_ca_set_no_cp (data, ivs, use);
4575 /* First try important candidates. Only if it fails, try the specific ones.
4576 Rationale -- in loops with many variables the best choice often is to use
4577 just one generic biv. If we added here many ivs specific to the uses,
4578 the optimization algorithm later would be likely to get stuck in a local
4579 minimum, thus causing us to create too many ivs. The approach from
4580 few ivs to more seems more likely to be successful -- starting from few
4581 ivs, replacing an expensive use by a specific iv should always be a
4582 win. */
4583 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4585 cand = iv_cand (data, i);
4587 if (iv_ca_cand_used_p (ivs, cand))
4588 continue;
4590 cp = get_use_iv_cost (data, use, cand);
4591 if (!cp)
4592 continue;
4594 iv_ca_set_cp (data, ivs, use, cp);
4595 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4596 iv_ca_set_no_cp (data, ivs, use);
4597 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4599 if (act_cost < best_cost)
4601 best_cost = act_cost;
4603 iv_ca_delta_free (&best_delta);
4604 best_delta = act_delta;
4606 else
4607 iv_ca_delta_free (&act_delta);
4610 if (best_cost == INFTY)
4612 for (i = 0; i < use->n_map_members; i++)
4614 cp = use->cost_map + i;
4615 cand = cp->cand;
4616 if (!cand)
4617 continue;
4619 /* Already tried this. */
4620 if (cand->important)
4621 continue;
4623 if (iv_ca_cand_used_p (ivs, cand))
4624 continue;
4626 act_delta = NULL;
4627 iv_ca_set_cp (data, ivs, use, cp);
4628 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4629 iv_ca_set_no_cp (data, ivs, use);
4630 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4631 cp, act_delta);
4633 if (act_cost < best_cost)
4635 best_cost = act_cost;
4637 if (best_delta)
4638 iv_ca_delta_free (&best_delta);
4639 best_delta = act_delta;
4641 else
4642 iv_ca_delta_free (&act_delta);
4646 iv_ca_delta_commit (data, ivs, best_delta, true);
4647 iv_ca_delta_free (&best_delta);
4649 return (best_cost != INFTY);
4652 /* Finds an initial assignment of candidates to uses. */
4654 static struct iv_ca *
4655 get_initial_solution (struct ivopts_data *data)
4657 struct iv_ca *ivs = iv_ca_new (data);
4658 unsigned i;
4660 for (i = 0; i < n_iv_uses (data); i++)
4661 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4663 iv_ca_free (&ivs);
4664 return NULL;
4667 return ivs;
4670 /* Tries to improve set of induction variables IVS. */
4672 static bool
4673 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4675 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4676 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4677 struct iv_cand *cand;
4679 /* Try extending the set of induction variables by one. */
4680 for (i = 0; i < n_iv_cands (data); i++)
4682 cand = iv_cand (data, i);
4684 if (iv_ca_cand_used_p (ivs, cand))
4685 continue;
4687 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4688 if (!act_delta)
4689 continue;
4691 /* If we successfully added the candidate and the set is small enough,
4692 try optimizing it by removing other candidates. */
4693 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4695 iv_ca_delta_commit (data, ivs, act_delta, true);
4696 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4697 iv_ca_delta_commit (data, ivs, act_delta, false);
4698 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4701 if (acost < best_cost)
4703 best_cost = acost;
4704 iv_ca_delta_free (&best_delta);
4705 best_delta = act_delta;
4707 else
4708 iv_ca_delta_free (&act_delta);
4711 if (!best_delta)
4713 /* Try removing the candidates from the set instead. */
4714 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4716 /* Nothing more we can do. */
4717 if (!best_delta)
4718 return false;
4721 iv_ca_delta_commit (data, ivs, best_delta, true);
4722 gcc_assert (best_cost == iv_ca_cost (ivs));
4723 iv_ca_delta_free (&best_delta);
4724 return true;
4727 /* Attempts to find the optimal set of induction variables. We do simple
4728 greedy heuristic -- we try to replace at most one candidate in the selected
4729 solution and remove the unused ivs while this improves the cost. */
4731 static struct iv_ca *
4732 find_optimal_iv_set (struct ivopts_data *data)
4734 unsigned i;
4735 struct iv_ca *set;
4736 struct iv_use *use;
4738 /* Get the initial solution. */
4739 set = get_initial_solution (data);
4740 if (!set)
4742 if (dump_file && (dump_flags & TDF_DETAILS))
4743 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4744 return NULL;
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Initial set of candidates:\n");
4750 iv_ca_dump (data, dump_file, set);
4753 while (try_improve_iv_set (data, set))
4755 if (dump_file && (dump_flags & TDF_DETAILS))
4757 fprintf (dump_file, "Improved to:\n");
4758 iv_ca_dump (data, dump_file, set);
4762 if (dump_file && (dump_flags & TDF_DETAILS))
4763 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4765 for (i = 0; i < n_iv_uses (data); i++)
4767 use = iv_use (data, i);
4768 use->selected = iv_ca_cand_for_use (set, use)->cand;
4771 return set;
4774 /* Creates a new induction variable corresponding to CAND. */
4776 static void
4777 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4779 block_stmt_iterator incr_pos;
4780 tree base;
4781 bool after = false;
4783 if (!cand->iv)
4784 return;
4786 switch (cand->pos)
4788 case IP_NORMAL:
4789 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4790 break;
4792 case IP_END:
4793 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4794 after = true;
4795 break;
4797 case IP_ORIGINAL:
4798 /* Mark that the iv is preserved. */
4799 name_info (data, cand->var_before)->preserve_biv = true;
4800 name_info (data, cand->var_after)->preserve_biv = true;
4802 /* Rewrite the increment so that it uses var_before directly. */
4803 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4805 return;
4808 gimple_add_tmp_var (cand->var_before);
4809 add_referenced_var (cand->var_before);
4811 base = unshare_expr (cand->iv->base);
4813 create_iv (base, unshare_expr (cand->iv->step),
4814 cand->var_before, data->current_loop,
4815 &incr_pos, after, &cand->var_before, &cand->var_after);
4818 /* Creates new induction variables described in SET. */
4820 static void
4821 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4823 unsigned i;
4824 struct iv_cand *cand;
4825 bitmap_iterator bi;
4827 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4829 cand = iv_cand (data, i);
4830 create_new_iv (data, cand);
4834 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4835 is true, remove also the ssa name defined by the statement. */
4837 static void
4838 remove_statement (tree stmt, bool including_defined_name)
4840 if (TREE_CODE (stmt) == PHI_NODE)
4842 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4844 else
4846 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4848 bsi_remove (&bsi, true);
4852 /* Rewrites USE (definition of iv used in a nonlinear expression)
4853 using candidate CAND. */
4855 static void
4856 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4857 struct iv_use *use, struct iv_cand *cand)
4859 tree comp;
4860 tree op, stmts, tgt, ass;
4861 block_stmt_iterator bsi, pbsi;
4863 /* An important special case -- if we are asked to express value of
4864 the original iv by itself, just exit; there is no need to
4865 introduce a new computation (that might also need casting the
4866 variable to unsigned and back). */
4867 if (cand->pos == IP_ORIGINAL
4868 && cand->incremented_at == use->stmt)
4870 tree step, ctype, utype;
4871 enum tree_code incr_code = PLUS_EXPR;
4873 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4874 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4876 step = cand->iv->step;
4877 ctype = TREE_TYPE (step);
4878 utype = TREE_TYPE (cand->var_after);
4879 if (TREE_CODE (step) == NEGATE_EXPR)
4881 incr_code = MINUS_EXPR;
4882 step = TREE_OPERAND (step, 0);
4885 /* Check whether we may leave the computation unchanged.
4886 This is the case only if it does not rely on other
4887 computations in the loop -- otherwise, the computation
4888 we rely upon may be removed in remove_unused_ivs,
4889 thus leading to ICE. */
4890 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4891 if (TREE_CODE (op) == PLUS_EXPR
4892 || TREE_CODE (op) == MINUS_EXPR)
4894 if (TREE_OPERAND (op, 0) == cand->var_before)
4895 op = TREE_OPERAND (op, 1);
4896 else if (TREE_CODE (op) == PLUS_EXPR
4897 && TREE_OPERAND (op, 1) == cand->var_before)
4898 op = TREE_OPERAND (op, 0);
4899 else
4900 op = NULL_TREE;
4902 else
4903 op = NULL_TREE;
4905 if (op
4906 && (TREE_CODE (op) == INTEGER_CST
4907 || operand_equal_p (op, step, 0)))
4908 return;
4910 /* Otherwise, add the necessary computations to express
4911 the iv. */
4912 op = fold_convert (ctype, cand->var_before);
4913 comp = fold_convert (utype,
4914 build2 (incr_code, ctype, op,
4915 unshare_expr (step)));
4917 else
4919 comp = get_computation (data->current_loop, use, cand);
4920 gcc_assert (comp != NULL_TREE);
4923 switch (TREE_CODE (use->stmt))
4925 case PHI_NODE:
4926 tgt = PHI_RESULT (use->stmt);
4928 /* If we should keep the biv, do not replace it. */
4929 if (name_info (data, tgt)->preserve_biv)
4930 return;
4932 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4933 while (!bsi_end_p (pbsi)
4934 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4936 bsi = pbsi;
4937 bsi_next (&pbsi);
4939 break;
4941 case GIMPLE_MODIFY_STMT:
4942 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4943 bsi = bsi_for_stmt (use->stmt);
4944 break;
4946 default:
4947 gcc_unreachable ();
4950 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4952 if (TREE_CODE (use->stmt) == PHI_NODE)
4954 if (stmts)
4955 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4956 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4957 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4958 remove_statement (use->stmt, false);
4959 SSA_NAME_DEF_STMT (tgt) = ass;
4961 else
4963 if (stmts)
4964 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4965 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4969 /* Replaces ssa name in index IDX by its basic variable. Callback for
4970 for_each_index. */
4972 static bool
4973 idx_remove_ssa_names (tree base, tree *idx,
4974 void *data ATTRIBUTE_UNUSED)
4976 tree *op;
4978 if (TREE_CODE (*idx) == SSA_NAME)
4979 *idx = SSA_NAME_VAR (*idx);
4981 if (TREE_CODE (base) == ARRAY_REF)
4983 op = &TREE_OPERAND (base, 2);
4984 if (*op
4985 && TREE_CODE (*op) == SSA_NAME)
4986 *op = SSA_NAME_VAR (*op);
4987 op = &TREE_OPERAND (base, 3);
4988 if (*op
4989 && TREE_CODE (*op) == SSA_NAME)
4990 *op = SSA_NAME_VAR (*op);
4993 return true;
4996 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4998 static tree
4999 unshare_and_remove_ssa_names (tree ref)
5001 ref = unshare_expr (ref);
5002 for_each_index (&ref, idx_remove_ssa_names, NULL);
5004 return ref;
5007 /* Extract the alias analysis info for the memory reference REF. There are
5008 several ways how this information may be stored and what precisely is
5009 its semantics depending on the type of the reference, but there always is
5010 somewhere hidden one _DECL node that is used to determine the set of
5011 virtual operands for the reference. The code below deciphers this jungle
5012 and extracts this single useful piece of information. */
5014 static tree
5015 get_ref_tag (tree ref, tree orig)
5017 tree var = get_base_address (ref);
5018 tree aref = NULL_TREE, tag, sv;
5019 HOST_WIDE_INT offset, size, maxsize;
5021 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5023 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5024 if (ref)
5025 break;
5028 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5029 return unshare_expr (sv);
5031 if (!var)
5032 return NULL_TREE;
5034 if (TREE_CODE (var) == INDIRECT_REF)
5036 /* If the base is a dereference of a pointer, first check its name memory
5037 tag. If it does not have one, use its symbol memory tag. */
5038 var = TREE_OPERAND (var, 0);
5039 if (TREE_CODE (var) != SSA_NAME)
5040 return NULL_TREE;
5042 if (SSA_NAME_PTR_INFO (var))
5044 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5045 if (tag)
5046 return tag;
5049 var = SSA_NAME_VAR (var);
5050 tag = symbol_mem_tag (var);
5051 gcc_assert (tag != NULL_TREE);
5052 return tag;
5054 else
5056 if (!DECL_P (var))
5057 return NULL_TREE;
5059 tag = symbol_mem_tag (var);
5060 if (tag)
5061 return tag;
5063 return var;
5067 /* Copies the reference information from OLD_REF to NEW_REF. */
5069 static void
5070 copy_ref_info (tree new_ref, tree old_ref)
5072 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5073 copy_mem_ref_info (new_ref, old_ref);
5074 else
5076 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5077 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5081 /* Rewrites USE (address that is an iv) using candidate CAND. */
5083 static void
5084 rewrite_use_address (struct ivopts_data *data,
5085 struct iv_use *use, struct iv_cand *cand)
5087 aff_tree aff;
5088 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5089 tree ref;
5090 bool ok;
5092 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5093 gcc_assert (ok);
5094 unshare_aff_combination (&aff);
5096 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5097 copy_ref_info (ref, *use->op_p);
5098 *use->op_p = ref;
5101 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5102 candidate CAND. */
5104 static void
5105 rewrite_use_compare (struct ivopts_data *data,
5106 struct iv_use *use, struct iv_cand *cand)
5108 tree comp;
5109 tree *op_p, cond, op, stmts, bound;
5110 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5111 enum tree_code compare;
5112 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5114 bound = cp->value;
5115 if (bound)
5117 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5118 tree var_type = TREE_TYPE (var);
5120 compare = iv_elimination_compare (data, use);
5121 bound = fold_convert (var_type, bound);
5122 op = force_gimple_operand (unshare_expr (bound), &stmts,
5123 true, NULL_TREE);
5125 if (stmts)
5126 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5128 *use->op_p = build2 (compare, boolean_type_node, var, op);
5129 update_stmt (use->stmt);
5130 return;
5133 /* The induction variable elimination failed; just express the original
5134 giv. */
5135 comp = get_computation (data->current_loop, use, cand);
5136 gcc_assert (comp != NULL_TREE);
5138 cond = *use->op_p;
5139 op_p = &TREE_OPERAND (cond, 0);
5140 if (TREE_CODE (*op_p) != SSA_NAME
5141 || integer_zerop (get_iv (data, *op_p)->step))
5142 op_p = &TREE_OPERAND (cond, 1);
5144 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5145 if (stmts)
5146 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5148 *op_p = op;
5151 /* Rewrites USE using candidate CAND. */
5153 static void
5154 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5156 push_stmt_changes (&use->stmt);
5158 switch (use->type)
5160 case USE_NONLINEAR_EXPR:
5161 rewrite_use_nonlinear_expr (data, use, cand);
5162 break;
5164 case USE_ADDRESS:
5165 rewrite_use_address (data, use, cand);
5166 break;
5168 case USE_COMPARE:
5169 rewrite_use_compare (data, use, cand);
5170 break;
5172 default:
5173 gcc_unreachable ();
5176 pop_stmt_changes (&use->stmt);
5179 /* Rewrite the uses using the selected induction variables. */
5181 static void
5182 rewrite_uses (struct ivopts_data *data)
5184 unsigned i;
5185 struct iv_cand *cand;
5186 struct iv_use *use;
5188 for (i = 0; i < n_iv_uses (data); i++)
5190 use = iv_use (data, i);
5191 cand = use->selected;
5192 gcc_assert (cand);
5194 rewrite_use (data, use, cand);
5198 /* Removes the ivs that are not used after rewriting. */
5200 static void
5201 remove_unused_ivs (struct ivopts_data *data)
5203 unsigned j;
5204 bitmap_iterator bi;
5206 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5208 struct version_info *info;
5210 info = ver_info (data, j);
5211 if (info->iv
5212 && !integer_zerop (info->iv->step)
5213 && !info->inv_id
5214 && !info->iv->have_use_for
5215 && !info->preserve_biv)
5216 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5220 /* Frees data allocated by the optimization of a single loop. */
5222 static void
5223 free_loop_data (struct ivopts_data *data)
5225 unsigned i, j;
5226 bitmap_iterator bi;
5227 tree obj;
5229 htab_empty (data->niters);
5231 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5233 struct version_info *info;
5235 info = ver_info (data, i);
5236 if (info->iv)
5237 free (info->iv);
5238 info->iv = NULL;
5239 info->has_nonlin_use = false;
5240 info->preserve_biv = false;
5241 info->inv_id = 0;
5243 bitmap_clear (data->relevant);
5244 bitmap_clear (data->important_candidates);
5246 for (i = 0; i < n_iv_uses (data); i++)
5248 struct iv_use *use = iv_use (data, i);
5250 free (use->iv);
5251 BITMAP_FREE (use->related_cands);
5252 for (j = 0; j < use->n_map_members; j++)
5253 if (use->cost_map[j].depends_on)
5254 BITMAP_FREE (use->cost_map[j].depends_on);
5255 free (use->cost_map);
5256 free (use);
5258 VEC_truncate (iv_use_p, data->iv_uses, 0);
5260 for (i = 0; i < n_iv_cands (data); i++)
5262 struct iv_cand *cand = iv_cand (data, i);
5264 if (cand->iv)
5265 free (cand->iv);
5266 if (cand->depends_on)
5267 BITMAP_FREE (cand->depends_on);
5268 free (cand);
5270 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5272 if (data->version_info_size < num_ssa_names)
5274 data->version_info_size = 2 * num_ssa_names;
5275 free (data->version_info);
5276 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5279 data->max_inv_id = 0;
5281 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5282 SET_DECL_RTL (obj, NULL_RTX);
5284 VEC_truncate (tree, decl_rtl_to_reset, 0);
5287 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5288 loop tree. */
5290 static void
5291 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5293 free_loop_data (data);
5294 free (data->version_info);
5295 BITMAP_FREE (data->relevant);
5296 BITMAP_FREE (data->important_candidates);
5297 htab_delete (data->niters);
5299 VEC_free (tree, heap, decl_rtl_to_reset);
5300 VEC_free (iv_use_p, heap, data->iv_uses);
5301 VEC_free (iv_cand_p, heap, data->iv_candidates);
5304 /* Optimizes the LOOP. Returns true if anything changed. */
5306 static bool
5307 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5309 bool changed = false;
5310 struct iv_ca *iv_ca;
5311 edge exit;
5313 data->current_loop = loop;
5315 if (dump_file && (dump_flags & TDF_DETAILS))
5317 fprintf (dump_file, "Processing loop %d\n", loop->num);
5319 exit = single_dom_exit (loop);
5320 if (exit)
5322 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5323 exit->src->index, exit->dest->index);
5324 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5325 fprintf (dump_file, "\n");
5328 fprintf (dump_file, "\n");
5331 /* For each ssa name determines whether it behaves as an induction variable
5332 in some loop. */
5333 if (!find_induction_variables (data))
5334 goto finish;
5336 /* Finds interesting uses (item 1). */
5337 find_interesting_uses (data);
5338 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5339 goto finish;
5341 /* Finds candidates for the induction variables (item 2). */
5342 find_iv_candidates (data);
5344 /* Calculates the costs (item 3, part 1). */
5345 determine_use_iv_costs (data);
5346 determine_iv_costs (data);
5347 determine_set_costs (data);
5349 /* Find the optimal set of induction variables (item 3, part 2). */
5350 iv_ca = find_optimal_iv_set (data);
5351 if (!iv_ca)
5352 goto finish;
5353 changed = true;
5355 /* Create the new induction variables (item 4, part 1). */
5356 create_new_ivs (data, iv_ca);
5357 iv_ca_free (&iv_ca);
5359 /* Rewrite the uses (item 4, part 2). */
5360 rewrite_uses (data);
5362 /* Remove the ivs that are unused after rewriting. */
5363 remove_unused_ivs (data);
5365 /* We have changed the structure of induction variables; it might happen
5366 that definitions in the scev database refer to some of them that were
5367 eliminated. */
5368 scev_reset ();
5370 finish:
5371 free_loop_data (data);
5373 return changed;
5376 /* Main entry point. Optimizes induction variables in loops. */
5378 void
5379 tree_ssa_iv_optimize (void)
5381 struct loop *loop;
5382 struct ivopts_data data;
5383 loop_iterator li;
5385 tree_ssa_iv_optimize_init (&data);
5387 /* Optimize the loops starting with the innermost ones. */
5388 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5390 if (dump_file && (dump_flags & TDF_DETAILS))
5391 flow_loop_dump (loop, dump_file, NULL, 1);
5393 tree_ssa_iv_optimize_loop (&data, loop);
5396 tree_ssa_iv_optimize_finalize (&data);