Merged r158704 through r158906 into branch.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
bloba64950ec85c838da4a794c9db5bfc376f8a4785c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
105 struct iv
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 unsigned inv_id; /* Id of an invariant. */
127 /* Types of uses. */
128 enum use_type
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
136 typedef struct
138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
149 struct cost_pair
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
160 /* Use. */
161 struct iv_use
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
180 enum iv_position
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
186 IP_ORIGINAL /* The original biv. */
189 /* The induction variable candidate. */
190 struct iv_cand
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
196 gimple incremented_at;/* For original biv, the statement where it is
197 incremented. */
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on; /* The list of invariants that are used in step of the
209 biv. */
212 /* The data used by the induction variable optimizations. */
214 typedef struct iv_use *iv_use_p;
215 DEF_VEC_P(iv_use_p);
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
222 struct ivopts_data
224 /* The currently optimized loop. */
225 struct loop *current_loop;
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
230 /* Number of registers used in it. */
231 unsigned regs_used;
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
239 /* The bitmap of indices in version_info whose value was changed. */
240 bitmap relevant;
242 /* The uses of induction variables. */
243 VEC(iv_use_p,heap) *iv_uses;
245 /* The candidates. */
246 VEC(iv_cand_p,heap) *iv_candidates;
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
251 /* The maximum invariant id. */
252 unsigned max_inv_id;
254 /* Whether to consider just related and important candidates when replacing a
255 use. */
256 bool consider_all_candidates;
258 /* Are we optimizing for speed? */
259 bool speed;
262 /* An assignment of iv candidates to uses. */
264 struct iv_ca
266 /* The number of uses covered by the assignment. */
267 unsigned upto;
269 /* Number of uses that cannot be expressed by the candidates in the set. */
270 unsigned bad_uses;
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
278 /* The candidates used. */
279 bitmap cands;
281 /* The number of candidates in the set. */
282 unsigned n_cands;
284 /* Total number of registers needed. */
285 unsigned n_regs;
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost;
290 /* Total cost of candidates. */
291 unsigned cand_cost;
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
296 /* Total cost of the assignment. */
297 comp_cost cost;
300 /* Difference of two iv candidate assignments. */
302 struct iv_ca_delta
304 /* Changed use. */
305 struct iv_use *use;
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
310 /* A new assignment. */
311 struct cost_pair *new_cp;
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
317 /* Bound on number of candidates below that all candidates are considered. */
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
334 /* The list of trees for that the decl_rtl field must be reset is stored
335 here. */
337 static VEC(tree,heap) *decl_rtl_to_reset;
339 /* Number of uses recorded in DATA. */
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
344 return VEC_length (iv_use_p, data->iv_uses);
347 /* Ith use recorded in DATA. */
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
352 return VEC_index (iv_use_p, data->iv_uses, i);
355 /* Number of candidates recorded in DATA. */
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
360 return VEC_length (iv_cand_p, data->iv_candidates);
363 /* Ith candidate recorded in DATA. */
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
368 return VEC_index (iv_cand_p, data->iv_candidates, i);
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
373 edge
374 single_dom_exit (struct loop *loop)
376 edge exit = single_exit (loop);
378 if (!exit)
379 return NULL;
381 if (!just_once_each_iteration_p (loop, exit->src))
382 return NULL;
384 return exit;
387 /* Dumps information about the induction variable IV to FILE. */
389 extern void dump_iv (FILE *, struct iv *);
390 void
391 dump_iv (FILE *file, struct iv *iv)
393 if (iv->ssa_name)
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
404 if (iv->step)
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
414 else
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
421 if (iv->base_object)
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
428 if (iv->biv_p)
429 fprintf (file, " is a biv\n");
432 /* Dumps information about the USE to FILE. */
434 extern void dump_use (FILE *, struct iv_use *);
435 void
436 dump_use (FILE *file, struct iv_use *use)
438 fprintf (file, "use %d\n", use->id);
440 switch (use->type)
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
444 break;
446 case USE_ADDRESS:
447 fprintf (file, " address\n");
448 break;
450 case USE_COMPARE:
451 fprintf (file, " compare\n");
452 break;
454 default:
455 gcc_unreachable ();
458 fprintf (file, " in statement ");
459 print_gimple_stmt (file, use->stmt, 0, 0);
460 fprintf (file, "\n");
462 fprintf (file, " at position ");
463 if (use->op_p)
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
467 dump_iv (file, use->iv);
469 if (use->related_cands)
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
476 /* Dumps information about the uses to FILE. */
478 extern void dump_uses (FILE *, struct ivopts_data *);
479 void
480 dump_uses (FILE *file, struct ivopts_data *data)
482 unsigned i;
483 struct iv_use *use;
485 for (i = 0; i < n_iv_uses (data); i++)
487 use = iv_use (data, i);
489 dump_use (file, use);
490 fprintf (file, "\n");
494 /* Dumps information about induction variable candidate CAND to FILE. */
496 extern void dump_cand (FILE *, struct iv_cand *);
497 void
498 dump_cand (FILE *file, struct iv_cand *cand)
500 struct iv *iv = cand->iv;
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
505 if (cand->depends_on)
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
511 if (!iv)
513 fprintf (file, " final value replacement\n");
514 return;
517 switch (cand->pos)
519 case IP_NORMAL:
520 fprintf (file, " incremented before exit test\n");
521 break;
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
531 case IP_END:
532 fprintf (file, " incremented at end\n");
533 break;
535 case IP_ORIGINAL:
536 fprintf (file, " original biv\n");
537 break;
540 dump_iv (file, iv);
543 /* Returns the info for ssa version VER. */
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
548 return data->version_info + ver;
551 /* Returns the info for ssa name NAME. */
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
556 return ver_info (data, SSA_NAME_VERSION (name));
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560 emitted in LOOP. */
562 static bool
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
567 gcc_assert (bb);
569 if (sbb == loop->latch)
570 return true;
572 if (sbb != bb)
573 return false;
575 return stmt == last_stmt (bb);
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
582 static bool
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589 return false;
591 if (stmt_bb != cand_bb)
592 return true;
594 if (true_if_equal
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return true;
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
603 static bool
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
606 switch (cand->pos)
608 case IP_END:
609 return false;
611 case IP_NORMAL:
612 return stmt_after_ip_normal_pos (loop, stmt);
614 case IP_ORIGINAL:
615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
621 default:
622 gcc_unreachable ();
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
628 static bool
629 abnormal_ssa_name_p (tree exp)
631 if (!exp)
632 return false;
634 if (TREE_CODE (exp) != SSA_NAME)
635 return false;
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
643 static bool
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 return false;
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 return false;
655 return !abnormal_ssa_name_p (*index);
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
661 bool
662 contains_abnormal_ssa_name_p (tree expr)
664 enum tree_code code;
665 enum tree_code_class codeclass;
667 if (!expr)
668 return false;
670 code = TREE_CODE (expr);
671 codeclass = TREE_CODE_CLASS (code);
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
678 return false;
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
683 NULL);
685 switch (codeclass)
687 case tcc_binary:
688 case tcc_comparison:
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 return true;
692 /* Fallthru. */
693 case tcc_unary:
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 return true;
697 break;
699 default:
700 gcc_unreachable ();
703 return false;
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
709 static tree
710 niter_for_exit (struct ivopts_data *data, edge exit)
712 struct tree_niter_desc desc;
713 tree niter;
714 void **slot;
716 if (!data->niters)
718 data->niters = pointer_map_create ();
719 slot = NULL;
721 else
722 slot = pointer_map_contains (data->niters, exit);
724 if (!slot)
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
732 exit, &desc, true)
733 && integer_zerop (desc.may_be_zero)
734 && !contains_abnormal_ssa_name_p (desc.niter))
735 niter = desc.niter;
736 else
737 niter = NULL_TREE;
739 *pointer_map_insert (data->niters, exit) = niter;
741 else
742 niter = (tree) *slot;
744 return niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
756 if (!exit)
757 return NULL;
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = NULL;
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
782 static tree
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr))
792 return determine_base_object (TREE_OPERAND (expr, 0));
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795 return NULL_TREE;
797 switch (code)
799 case INTEGER_CST:
800 return NULL_TREE;
802 case ADDR_EXPR:
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
806 if (!base)
807 return expr;
809 if (TREE_CODE (base) == INDIRECT_REF)
810 return determine_base_object (TREE_OPERAND (base, 0));
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
818 case PLUS_EXPR:
819 case MINUS_EXPR:
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
821 gcc_unreachable ();
823 default:
824 return fold_convert (ptr_type_node, expr);
828 /* Allocates an induction variable with given initial value BASE and step STEP
829 for loop LOOP. */
831 static struct iv *
832 alloc_iv (tree base, tree step)
834 struct iv *iv = XCNEW (struct iv);
835 gcc_assert (step != NULL_TREE);
837 iv->base = base;
838 iv->base_object = determine_base_object (base);
839 iv->step = step;
840 iv->biv_p = false;
841 iv->have_use_for = false;
842 iv->use_id = 0;
843 iv->ssa_name = NULL_TREE;
845 return iv;
848 /* Sets STEP and BASE for induction variable IV. */
850 static void
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
853 struct version_info *info = name_info (data, iv);
855 gcc_assert (!info->iv);
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
862 /* Finds induction variable declaration for VAR. */
864 static struct iv *
865 get_iv (struct ivopts_data *data, tree var)
867 basic_block bb;
868 tree type = TREE_TYPE (var);
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
872 return NULL;
874 if (!name_info (data, var)->iv)
876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
878 if (!bb
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
880 set_iv (data, var, var, build_int_cst (type, 0));
883 return name_info (data, var)->iv;
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
889 static tree
890 determine_biv_step (gimple phi)
892 struct loop *loop = gimple_bb (phi)->loop_father;
893 tree name = PHI_RESULT (phi);
894 affine_iv iv;
896 if (!is_gimple_reg (name))
897 return NULL_TREE;
899 if (!simple_iv (loop, loop, name, &iv, true))
900 return NULL_TREE;
902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
905 /* Finds basic ivs. */
907 static bool
908 find_bivs (struct ivopts_data *data)
910 gimple phi;
911 tree step, type, base;
912 bool found = false;
913 struct loop *loop = data->current_loop;
914 gimple_stmt_iterator psi;
916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
918 phi = gsi_stmt (psi);
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 continue;
923 step = determine_biv_step (phi);
924 if (!step)
925 continue;
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928 base = expand_simple_operations (base);
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
931 continue;
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
935 if (step)
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
939 else
940 step = fold_convert (type, step);
943 set_iv (data, PHI_RESULT (phi), base, step);
944 found = true;
947 return found;
950 /* Marks basic ivs. */
952 static void
953 mark_bivs (struct ivopts_data *data)
955 gimple phi;
956 tree var;
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
959 basic_block incr_bb;
960 gimple_stmt_iterator psi;
962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
964 phi = gsi_stmt (psi);
966 iv = get_iv (data, PHI_RESULT (phi));
967 if (!iv)
968 continue;
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
972 if (!incr_iv)
973 continue;
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 continue;
981 iv->biv_p = true;
982 incr_iv->biv_p = true;
986 /* Checks whether STMT defines a linear induction variable and stores its
987 parameters to IV. */
989 static bool
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
992 tree lhs;
993 struct loop *loop = data->current_loop;
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
999 return false;
1001 lhs = gimple_assign_lhs (stmt);
1002 if (TREE_CODE (lhs) != SSA_NAME)
1003 return false;
1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006 return false;
1007 iv->base = expand_simple_operations (iv->base);
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
1011 return false;
1013 return true;
1016 /* Finds general ivs in statement STMT. */
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1021 affine_iv iv;
1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024 return;
1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1029 /* Finds general ivs in basic block BB. */
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1034 gimple_stmt_iterator bsi;
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
1040 /* Finds general ivs. */
1042 static void
1043 find_givs (struct ivopts_data *data)
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1047 unsigned i;
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1051 free (body);
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1060 unsigned i;
1061 bitmap_iterator bi;
1063 if (!find_bivs (data))
1064 return false;
1066 find_givs (data);
1067 mark_bivs (data);
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1071 tree niter = niter_for_single_dom_exit (data);
1073 if (niter)
1075 fprintf (dump_file, " number of iterations ");
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
1080 fprintf (dump_file, "Induction variables:\n\n");
1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
1089 return true;
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 gimple stmt, enum use_type use_type)
1098 struct iv_use *use = XCNEW (struct iv_use);
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1102 use->iv = iv;
1103 use->stmt = stmt;
1104 use->op_p = use_p;
1105 use->related_cands = BITMAP_ALLOC (NULL);
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 caller. */
1109 iv->ssa_name = NULL_TREE;
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1116 return use;
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1126 basic_block bb;
1127 struct version_info *info;
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1131 return;
1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134 if (bb
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1136 return;
1138 info = name_info (data, op);
1139 info->name = op;
1140 info->has_nonlin_use |= nonlinear_use;
1141 if (!info->inv_id)
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1146 /* Checks whether the use OP is interesting and if so, records it. */
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1151 struct iv *iv;
1152 struct iv *civ;
1153 gimple stmt;
1154 struct iv_use *use;
1156 if (TREE_CODE (op) != SSA_NAME)
1157 return NULL;
1159 iv = get_iv (data, op);
1160 if (!iv)
1161 return NULL;
1163 if (iv->have_use_for)
1165 use = iv_use (data, iv->use_id);
1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168 return use;
1171 if (integer_zerop (iv->step))
1173 record_invariant (data, op, true);
1174 return NULL;
1176 iv->have_use_for = true;
1178 civ = XNEW (struct iv);
1179 *civ = *iv;
1181 stmt = SSA_NAME_DEF_STMT (op);
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186 iv->use_id = use->id;
1188 return use;
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv;
1206 static tree zero;
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209 bool ret = false;
1211 if (gimple_code (stmt) == GIMPLE_COND)
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
1216 else
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
1230 /* Exactly one of the compared values must be an iv, and the other one must
1231 be an invariant. */
1232 if (!iv0 || !iv1)
1233 goto end;
1235 if (integer_zerop (iv0->step))
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1243 end:
1244 if (control_var)
1245 *control_var = op0;;
1246 if (iv_var)
1247 *iv_var = iv0;;
1248 if (bound)
1249 *bound = op1;
1250 if (iv_bound)
1251 *iv_bound = iv1;
1253 return ret;
1256 /* Checks whether the condition in STMT is interesting and if so,
1257 records it. */
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
1269 return;
1272 civ = XNEW (struct iv);
1273 *civ = *var_iv;
1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1284 basic_block def_bb;
1285 unsigned i, len;
1287 gcc_assert (loop_depth (loop) > 0);
1289 if (is_gimple_min_invariant (expr))
1290 return true;
1292 if (TREE_CODE (expr) == SSA_NAME)
1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295 if (def_bb
1296 && flow_bb_inside_loop_p (loop, def_bb))
1297 return false;
1299 return true;
1302 if (!EXPR_P (expr))
1303 return false;
1305 len = TREE_OPERAND_LENGTH (expr);
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308 return false;
1310 return true;
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1320 unsigned i;
1321 tree lhs;
1323 gcc_assert (loop_depth (loop) > 0);
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 return false;
1333 return true;
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1340 struct ifs_ivopts_data
1342 struct ivopts_data *ivopts_data;
1343 gimple stmt;
1344 tree step;
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351 struct iv *iv;
1352 tree step, iv_base, iv_step, lbound, off;
1353 struct loop *loop = dta->ivopts_data->current_loop;
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357 return false;
1359 /* If base is a component ref, require that the offset of the reference
1360 be invariant. */
1361 if (TREE_CODE (base) == COMPONENT_REF)
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 return false;
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1383 return false;
1386 if (TREE_CODE (*idx) != SSA_NAME)
1387 return true;
1389 iv = get_iv (dta->ivopts_data, *idx);
1390 if (!iv)
1391 return false;
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1396 *idx = iv->base;
1398 if (integer_zerop (iv->step))
1399 return true;
1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1403 step = array_ref_element_size (base);
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1407 return false;
1409 else
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step = build_int_cst (sizetype, 1);
1413 iv_base = iv->base;
1414 iv_step = iv->step;
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1417 false))
1419 /* The index might wrap. */
1420 return false;
1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1426 return true;
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434 void *vdata)
1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
1437 find_interesting_uses_op (data, *idx);
1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1443 return true;
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1454 tree mby;
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1459 STRIP_NOPS (top);
1460 STRIP_NOPS (bot);
1462 if (operand_equal_p (top, bot, 0))
1464 *mul = double_int_one;
1465 return true;
1468 code = TREE_CODE (top);
1469 switch (code)
1471 case MULT_EXPR:
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1474 return false;
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 return false;
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 precision);
1481 return true;
1483 case PLUS_EXPR:
1484 case MINUS_EXPR:
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 return false;
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1492 return true;
1494 case INTEGER_CST:
1495 if (TREE_CODE (bot) != INTEGER_CST)
1496 return false;
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1501 return false;
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 precision);
1504 return double_int_zero_p (res);
1506 default:
1507 return false;
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1516 tree base;
1517 tree base_type;
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1520 tree toffset;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1528 return false;
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1538 if (mode != BLKmode)
1540 double_int mul;
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546 || bitpos % BITS_PER_UNIT != 0)
1547 return true;
1549 if (!constant_multiple_of (step, al, &mul))
1550 return true;
1553 return false;
1556 /* Return true if EXPR may be non-addressable. */
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1561 switch (TREE_CODE (expr))
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1566 return false;
1568 case COMPONENT_REF:
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579 return true;
1581 /* ... fall through ... */
1583 case ARRAY_REF:
1584 case ARRAY_RANGE_REF:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1587 CASE_CONVERT:
1588 return true;
1590 default:
1591 break;
1594 return false;
1597 /* Finds addresses in *OP_P inside STMT. */
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
1603 struct iv *civ;
1604 struct ifs_ivopts_data ifs_ivopts_data;
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt))
1609 goto fail;
1611 /* Ignore bitfields for now. Not really something terribly complicated
1612 to handle. TODO. */
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1614 goto fail;
1616 base = unshare_expr (base);
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1620 tree type = build_pointer_type (TREE_TYPE (base));
1621 tree astep;
1623 if (TMR_BASE (base)
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1626 civ = get_iv (data, TMR_BASE (base));
1627 if (!civ)
1628 goto fail;
1630 TMR_BASE (base) = civ->base;
1631 step = civ->step;
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1636 civ = get_iv (data, TMR_INDEX (base));
1637 if (!civ)
1638 goto fail;
1640 TMR_INDEX (base) = civ->base;
1641 astep = civ->step;
1643 if (astep)
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
1652 if (integer_zerop (step))
1653 goto fail;
1654 base = tree_mem_ref_addr (type, base);
1656 else
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662 || integer_zerop (ifs_ivopts_data.step))
1663 goto fail;
1664 step = ifs_ivopts_data.step;
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1672 goto fail;
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677 goto fail;
1679 base = build_fold_addr_expr (base);
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1690 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1691 if (tem)
1692 *ref = tem;
1697 civ = alloc_iv (base, step);
1698 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1699 return;
1701 fail:
1702 for_each_index (op_p, idx_record_use, data);
1705 /* Finds and records invariants used in STMT. */
1707 static void
1708 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1710 ssa_op_iter iter;
1711 use_operand_p use_p;
1712 tree op;
1714 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1716 op = USE_FROM_PTR (use_p);
1717 record_invariant (data, op, false);
1721 /* Finds interesting uses of induction variables in the statement STMT. */
1723 static void
1724 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1726 struct iv *iv;
1727 tree op, *lhs, *rhs;
1728 ssa_op_iter iter;
1729 use_operand_p use_p;
1730 enum tree_code code;
1732 find_invariants_stmt (data, stmt);
1734 if (gimple_code (stmt) == GIMPLE_COND)
1736 find_interesting_uses_cond (data, stmt);
1737 return;
1740 if (is_gimple_assign (stmt))
1742 lhs = gimple_assign_lhs_ptr (stmt);
1743 rhs = gimple_assign_rhs1_ptr (stmt);
1745 if (TREE_CODE (*lhs) == SSA_NAME)
1747 /* If the statement defines an induction variable, the uses are not
1748 interesting by themselves. */
1750 iv = get_iv (data, *lhs);
1752 if (iv && !integer_zerop (iv->step))
1753 return;
1756 code = gimple_assign_rhs_code (stmt);
1757 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1758 && (REFERENCE_CLASS_P (*rhs)
1759 || is_gimple_val (*rhs)))
1761 if (REFERENCE_CLASS_P (*rhs))
1762 find_interesting_uses_address (data, stmt, rhs);
1763 else
1764 find_interesting_uses_op (data, *rhs);
1766 if (REFERENCE_CLASS_P (*lhs))
1767 find_interesting_uses_address (data, stmt, lhs);
1768 return;
1770 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1772 find_interesting_uses_cond (data, stmt);
1773 return;
1776 /* TODO -- we should also handle address uses of type
1778 memory = call (whatever);
1782 call (memory). */
1785 if (gimple_code (stmt) == GIMPLE_PHI
1786 && gimple_bb (stmt) == data->current_loop->header)
1788 iv = get_iv (data, PHI_RESULT (stmt));
1790 if (iv && !integer_zerop (iv->step))
1791 return;
1794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1796 op = USE_FROM_PTR (use_p);
1798 if (TREE_CODE (op) != SSA_NAME)
1799 continue;
1801 iv = get_iv (data, op);
1802 if (!iv)
1803 continue;
1805 find_interesting_uses_op (data, op);
1809 /* Finds interesting uses of induction variables outside of loops
1810 on loop exit edge EXIT. */
1812 static void
1813 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1815 gimple phi;
1816 gimple_stmt_iterator psi;
1817 tree def;
1819 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1821 phi = gsi_stmt (psi);
1822 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1823 if (is_gimple_reg (def))
1824 find_interesting_uses_op (data, def);
1828 /* Finds uses of the induction variables that are interesting. */
1830 static void
1831 find_interesting_uses (struct ivopts_data *data)
1833 basic_block bb;
1834 gimple_stmt_iterator bsi;
1835 basic_block *body = get_loop_body (data->current_loop);
1836 unsigned i;
1837 struct version_info *info;
1838 edge e;
1840 if (dump_file && (dump_flags & TDF_DETAILS))
1841 fprintf (dump_file, "Uses:\n\n");
1843 for (i = 0; i < data->current_loop->num_nodes; i++)
1845 edge_iterator ei;
1846 bb = body[i];
1848 FOR_EACH_EDGE (e, ei, bb->succs)
1849 if (e->dest != EXIT_BLOCK_PTR
1850 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1851 find_interesting_uses_outside (data, e);
1853 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1854 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1855 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1856 if (!is_gimple_debug (gsi_stmt (bsi)))
1857 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1860 if (dump_file && (dump_flags & TDF_DETAILS))
1862 bitmap_iterator bi;
1864 fprintf (dump_file, "\n");
1866 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1868 info = ver_info (data, i);
1869 if (info->inv_id)
1871 fprintf (dump_file, " ");
1872 print_generic_expr (dump_file, info->name, TDF_SLIM);
1873 fprintf (dump_file, " is invariant (%d)%s\n",
1874 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1878 fprintf (dump_file, "\n");
1881 free (body);
1884 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1885 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1886 we are at the top-level of the processed address. */
1888 static tree
1889 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1890 unsigned HOST_WIDE_INT *offset)
1892 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1893 enum tree_code code;
1894 tree type, orig_type = TREE_TYPE (expr);
1895 unsigned HOST_WIDE_INT off0, off1, st;
1896 tree orig_expr = expr;
1898 STRIP_NOPS (expr);
1900 type = TREE_TYPE (expr);
1901 code = TREE_CODE (expr);
1902 *offset = 0;
1904 switch (code)
1906 case INTEGER_CST:
1907 if (!cst_and_fits_in_hwi (expr)
1908 || integer_zerop (expr))
1909 return orig_expr;
1911 *offset = int_cst_value (expr);
1912 return build_int_cst (orig_type, 0);
1914 case POINTER_PLUS_EXPR:
1915 case PLUS_EXPR:
1916 case MINUS_EXPR:
1917 op0 = TREE_OPERAND (expr, 0);
1918 op1 = TREE_OPERAND (expr, 1);
1920 op0 = strip_offset_1 (op0, false, false, &off0);
1921 op1 = strip_offset_1 (op1, false, false, &off1);
1923 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1924 if (op0 == TREE_OPERAND (expr, 0)
1925 && op1 == TREE_OPERAND (expr, 1))
1926 return orig_expr;
1928 if (integer_zerop (op1))
1929 expr = op0;
1930 else if (integer_zerop (op0))
1932 if (code == MINUS_EXPR)
1933 expr = fold_build1 (NEGATE_EXPR, type, op1);
1934 else
1935 expr = op1;
1937 else
1938 expr = fold_build2 (code, type, op0, op1);
1940 return fold_convert (orig_type, expr);
1942 case MULT_EXPR:
1943 op1 = TREE_OPERAND (expr, 1);
1944 if (!cst_and_fits_in_hwi (op1))
1945 return orig_expr;
1947 op0 = TREE_OPERAND (expr, 0);
1948 op0 = strip_offset_1 (op0, false, false, &off0);
1949 if (op0 == TREE_OPERAND (expr, 0))
1950 return orig_expr;
1952 *offset = off0 * int_cst_value (op1);
1953 if (integer_zerop (op0))
1954 expr = op0;
1955 else
1956 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1958 return fold_convert (orig_type, expr);
1960 case ARRAY_REF:
1961 case ARRAY_RANGE_REF:
1962 if (!inside_addr)
1963 return orig_expr;
1965 step = array_ref_element_size (expr);
1966 if (!cst_and_fits_in_hwi (step))
1967 break;
1969 st = int_cst_value (step);
1970 op1 = TREE_OPERAND (expr, 1);
1971 op1 = strip_offset_1 (op1, false, false, &off1);
1972 *offset = off1 * st;
1974 if (top_compref
1975 && integer_zerop (op1))
1977 /* Strip the component reference completely. */
1978 op0 = TREE_OPERAND (expr, 0);
1979 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1980 *offset += off0;
1981 return op0;
1983 break;
1985 case COMPONENT_REF:
1986 if (!inside_addr)
1987 return orig_expr;
1989 tmp = component_ref_field_offset (expr);
1990 if (top_compref
1991 && cst_and_fits_in_hwi (tmp))
1993 /* Strip the component reference completely. */
1994 op0 = TREE_OPERAND (expr, 0);
1995 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1996 *offset = off0 + int_cst_value (tmp);
1997 return op0;
1999 break;
2001 case ADDR_EXPR:
2002 op0 = TREE_OPERAND (expr, 0);
2003 op0 = strip_offset_1 (op0, true, true, &off0);
2004 *offset += off0;
2006 if (op0 == TREE_OPERAND (expr, 0))
2007 return orig_expr;
2009 expr = build_fold_addr_expr (op0);
2010 return fold_convert (orig_type, expr);
2012 case INDIRECT_REF:
2013 inside_addr = false;
2014 break;
2016 default:
2017 return orig_expr;
2020 /* Default handling of expressions for that we want to recurse into
2021 the first operand. */
2022 op0 = TREE_OPERAND (expr, 0);
2023 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2024 *offset += off0;
2026 if (op0 == TREE_OPERAND (expr, 0)
2027 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2028 return orig_expr;
2030 expr = copy_node (expr);
2031 TREE_OPERAND (expr, 0) = op0;
2032 if (op1)
2033 TREE_OPERAND (expr, 1) = op1;
2035 /* Inside address, we might strip the top level component references,
2036 thus changing type of the expression. Handling of ADDR_EXPR
2037 will fix that. */
2038 expr = fold_convert (orig_type, expr);
2040 return expr;
2043 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2045 static tree
2046 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2048 return strip_offset_1 (expr, false, false, offset);
2051 /* Returns variant of TYPE that can be used as base for different uses.
2052 We return unsigned type with the same precision, which avoids problems
2053 with overflows. */
2055 static tree
2056 generic_type_for (tree type)
2058 if (POINTER_TYPE_P (type))
2059 return unsigned_type_for (type);
2061 if (TYPE_UNSIGNED (type))
2062 return type;
2064 return unsigned_type_for (type);
2067 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2068 the bitmap to that we should store it. */
2070 static struct ivopts_data *fd_ivopts_data;
2071 static tree
2072 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2074 bitmap *depends_on = (bitmap *) data;
2075 struct version_info *info;
2077 if (TREE_CODE (*expr_p) != SSA_NAME)
2078 return NULL_TREE;
2079 info = name_info (fd_ivopts_data, *expr_p);
2081 if (!info->inv_id || info->has_nonlin_use)
2082 return NULL_TREE;
2084 if (!*depends_on)
2085 *depends_on = BITMAP_ALLOC (NULL);
2086 bitmap_set_bit (*depends_on, info->inv_id);
2088 return NULL_TREE;
2091 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2092 position to POS. If USE is not NULL, the candidate is set as related to
2093 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2094 replacement of the final value of the iv by a direct computation. */
2096 static struct iv_cand *
2097 add_candidate_1 (struct ivopts_data *data,
2098 tree base, tree step, bool important, enum iv_position pos,
2099 struct iv_use *use, gimple incremented_at)
2101 unsigned i;
2102 struct iv_cand *cand = NULL;
2103 tree type, orig_type;
2105 if (base)
2107 orig_type = TREE_TYPE (base);
2108 type = generic_type_for (orig_type);
2109 if (type != orig_type)
2111 base = fold_convert (type, base);
2112 step = fold_convert (type, step);
2116 for (i = 0; i < n_iv_cands (data); i++)
2118 cand = iv_cand (data, i);
2120 if (cand->pos != pos)
2121 continue;
2123 if (cand->incremented_at != incremented_at
2124 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2125 && cand->ainc_use != use))
2126 continue;
2128 if (!cand->iv)
2130 if (!base && !step)
2131 break;
2133 continue;
2136 if (!base && !step)
2137 continue;
2139 if (operand_equal_p (base, cand->iv->base, 0)
2140 && operand_equal_p (step, cand->iv->step, 0))
2141 break;
2144 if (i == n_iv_cands (data))
2146 cand = XCNEW (struct iv_cand);
2147 cand->id = i;
2149 if (!base && !step)
2150 cand->iv = NULL;
2151 else
2152 cand->iv = alloc_iv (base, step);
2154 cand->pos = pos;
2155 if (pos != IP_ORIGINAL && cand->iv)
2157 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2158 cand->var_after = cand->var_before;
2160 cand->important = important;
2161 cand->incremented_at = incremented_at;
2162 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2164 if (step
2165 && TREE_CODE (step) != INTEGER_CST)
2167 fd_ivopts_data = data;
2168 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2171 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2172 cand->ainc_use = use;
2173 else
2174 cand->ainc_use = NULL;
2176 if (dump_file && (dump_flags & TDF_DETAILS))
2177 dump_cand (dump_file, cand);
2180 if (important && !cand->important)
2182 cand->important = true;
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2187 if (use)
2189 bitmap_set_bit (use->related_cands, i);
2190 if (dump_file && (dump_flags & TDF_DETAILS))
2191 fprintf (dump_file, "Candidate %d is related to use %d\n",
2192 cand->id, use->id);
2195 return cand;
2198 /* Returns true if incrementing the induction variable at the end of the LOOP
2199 is allowed.
2201 The purpose is to avoid splitting latch edge with a biv increment, thus
2202 creating a jump, possibly confusing other optimization passes and leaving
2203 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2204 is not available (so we do not have a better alternative), or if the latch
2205 edge is already nonempty. */
2207 static bool
2208 allow_ip_end_pos_p (struct loop *loop)
2210 if (!ip_normal_pos (loop))
2211 return true;
2213 if (!empty_block_p (ip_end_pos (loop)))
2214 return true;
2216 return false;
2219 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2220 Important field is set to IMPORTANT. */
2222 static void
2223 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2224 bool important, struct iv_use *use)
2226 basic_block use_bb = gimple_bb (use->stmt);
2227 enum machine_mode mem_mode;
2228 unsigned HOST_WIDE_INT cstepi;
2230 /* If we insert the increment in any position other than the standard
2231 ones, we must ensure that it is incremented once per iteration.
2232 It must not be in an inner nested loop, or one side of an if
2233 statement. */
2234 if (use_bb->loop_father != data->current_loop
2235 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2236 || stmt_could_throw_p (use->stmt)
2237 || !cst_and_fits_in_hwi (step))
2238 return;
2240 cstepi = int_cst_value (step);
2242 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2243 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2244 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2246 enum tree_code code = MINUS_EXPR;
2247 tree new_base;
2248 tree new_step = step;
2250 if (POINTER_TYPE_P (TREE_TYPE (base)))
2252 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2253 code = POINTER_PLUS_EXPR;
2255 else
2256 new_step = fold_convert (TREE_TYPE (base), new_step);
2257 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2258 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2259 use->stmt);
2261 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2262 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2264 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2265 use->stmt);
2269 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2270 position to POS. If USE is not NULL, the candidate is set as related to
2271 it. The candidate computation is scheduled on all available positions. */
2273 static void
2274 add_candidate (struct ivopts_data *data,
2275 tree base, tree step, bool important, struct iv_use *use)
2277 if (ip_normal_pos (data->current_loop))
2278 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2279 if (ip_end_pos (data->current_loop)
2280 && allow_ip_end_pos_p (data->current_loop))
2281 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2283 if (use != NULL && use->type == USE_ADDRESS)
2284 add_autoinc_candidates (data, base, step, important, use);
2287 /* Add a standard "0 + 1 * iteration" iv candidate for a
2288 type with SIZE bits. */
2290 static void
2291 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2292 unsigned int size)
2294 tree type = lang_hooks.types.type_for_size (size, true);
2295 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2296 true, NULL);
2299 /* Adds standard iv candidates. */
2301 static void
2302 add_standard_iv_candidates (struct ivopts_data *data)
2304 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2306 /* The same for a double-integer type if it is still fast enough. */
2307 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2308 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2312 /* Adds candidates bases on the old induction variable IV. */
2314 static void
2315 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2317 gimple phi;
2318 tree def;
2319 struct iv_cand *cand;
2321 add_candidate (data, iv->base, iv->step, true, NULL);
2323 /* The same, but with initial value zero. */
2324 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2325 add_candidate (data, size_int (0), iv->step, true, NULL);
2326 else
2327 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2328 iv->step, true, NULL);
2330 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2331 if (gimple_code (phi) == GIMPLE_PHI)
2333 /* Additionally record the possibility of leaving the original iv
2334 untouched. */
2335 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2336 cand = add_candidate_1 (data,
2337 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2338 SSA_NAME_DEF_STMT (def));
2339 cand->var_before = iv->ssa_name;
2340 cand->var_after = def;
2344 /* Adds candidates based on the old induction variables. */
2346 static void
2347 add_old_ivs_candidates (struct ivopts_data *data)
2349 unsigned i;
2350 struct iv *iv;
2351 bitmap_iterator bi;
2353 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2355 iv = ver_info (data, i)->iv;
2356 if (iv && iv->biv_p && !integer_zerop (iv->step))
2357 add_old_iv_candidates (data, iv);
2361 /* Adds candidates based on the value of the induction variable IV and USE. */
2363 static void
2364 add_iv_value_candidates (struct ivopts_data *data,
2365 struct iv *iv, struct iv_use *use)
2367 unsigned HOST_WIDE_INT offset;
2368 tree base;
2369 tree basetype;
2371 add_candidate (data, iv->base, iv->step, false, use);
2373 /* The same, but with initial value zero. Make such variable important,
2374 since it is generic enough so that possibly many uses may be based
2375 on it. */
2376 basetype = TREE_TYPE (iv->base);
2377 if (POINTER_TYPE_P (basetype))
2378 basetype = sizetype;
2379 add_candidate (data, build_int_cst (basetype, 0),
2380 iv->step, true, use);
2382 /* Third, try removing the constant offset. Make sure to even
2383 add a candidate for &a[0] vs. (T *)&a. */
2384 base = strip_offset (iv->base, &offset);
2385 if (offset
2386 || base != iv->base)
2387 add_candidate (data, base, iv->step, false, use);
2390 /* Adds candidates based on the uses. */
2392 static void
2393 add_derived_ivs_candidates (struct ivopts_data *data)
2395 unsigned i;
2397 for (i = 0; i < n_iv_uses (data); i++)
2399 struct iv_use *use = iv_use (data, i);
2401 if (!use)
2402 continue;
2404 switch (use->type)
2406 case USE_NONLINEAR_EXPR:
2407 case USE_COMPARE:
2408 case USE_ADDRESS:
2409 /* Just add the ivs based on the value of the iv used here. */
2410 add_iv_value_candidates (data, use->iv, use);
2411 break;
2413 default:
2414 gcc_unreachable ();
2419 /* Record important candidates and add them to related_cands bitmaps
2420 if needed. */
2422 static void
2423 record_important_candidates (struct ivopts_data *data)
2425 unsigned i;
2426 struct iv_use *use;
2428 for (i = 0; i < n_iv_cands (data); i++)
2430 struct iv_cand *cand = iv_cand (data, i);
2432 if (cand->important)
2433 bitmap_set_bit (data->important_candidates, i);
2436 data->consider_all_candidates = (n_iv_cands (data)
2437 <= CONSIDER_ALL_CANDIDATES_BOUND);
2439 if (data->consider_all_candidates)
2441 /* We will not need "related_cands" bitmaps in this case,
2442 so release them to decrease peak memory consumption. */
2443 for (i = 0; i < n_iv_uses (data); i++)
2445 use = iv_use (data, i);
2446 BITMAP_FREE (use->related_cands);
2449 else
2451 /* Add important candidates to the related_cands bitmaps. */
2452 for (i = 0; i < n_iv_uses (data); i++)
2453 bitmap_ior_into (iv_use (data, i)->related_cands,
2454 data->important_candidates);
2458 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2459 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2460 we allocate a simple list to every use. */
2462 static void
2463 alloc_use_cost_map (struct ivopts_data *data)
2465 unsigned i, size, s, j;
2467 for (i = 0; i < n_iv_uses (data); i++)
2469 struct iv_use *use = iv_use (data, i);
2470 bitmap_iterator bi;
2472 if (data->consider_all_candidates)
2473 size = n_iv_cands (data);
2474 else
2476 s = 0;
2477 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2479 s++;
2482 /* Round up to the power of two, so that moduling by it is fast. */
2483 for (size = 1; size < s; size <<= 1)
2484 continue;
2487 use->n_map_members = size;
2488 use->cost_map = XCNEWVEC (struct cost_pair, size);
2492 /* Returns description of computation cost of expression whose runtime
2493 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2495 static comp_cost
2496 new_cost (unsigned runtime, unsigned complexity)
2498 comp_cost cost;
2500 cost.cost = runtime;
2501 cost.complexity = complexity;
2503 return cost;
2506 /* Adds costs COST1 and COST2. */
2508 static comp_cost
2509 add_costs (comp_cost cost1, comp_cost cost2)
2511 cost1.cost += cost2.cost;
2512 cost1.complexity += cost2.complexity;
2514 return cost1;
2516 /* Subtracts costs COST1 and COST2. */
2518 static comp_cost
2519 sub_costs (comp_cost cost1, comp_cost cost2)
2521 cost1.cost -= cost2.cost;
2522 cost1.complexity -= cost2.complexity;
2524 return cost1;
2527 /* Returns a negative number if COST1 < COST2, a positive number if
2528 COST1 > COST2, and 0 if COST1 = COST2. */
2530 static int
2531 compare_costs (comp_cost cost1, comp_cost cost2)
2533 if (cost1.cost == cost2.cost)
2534 return cost1.complexity - cost2.complexity;
2536 return cost1.cost - cost2.cost;
2539 /* Returns true if COST is infinite. */
2541 static bool
2542 infinite_cost_p (comp_cost cost)
2544 return cost.cost == INFTY;
2547 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2548 on invariants DEPENDS_ON and that the value used in expressing it
2549 is VALUE. */
2551 static void
2552 set_use_iv_cost (struct ivopts_data *data,
2553 struct iv_use *use, struct iv_cand *cand,
2554 comp_cost cost, bitmap depends_on, tree value)
2556 unsigned i, s;
2558 if (infinite_cost_p (cost))
2560 BITMAP_FREE (depends_on);
2561 return;
2564 if (data->consider_all_candidates)
2566 use->cost_map[cand->id].cand = cand;
2567 use->cost_map[cand->id].cost = cost;
2568 use->cost_map[cand->id].depends_on = depends_on;
2569 use->cost_map[cand->id].value = value;
2570 return;
2573 /* n_map_members is a power of two, so this computes modulo. */
2574 s = cand->id & (use->n_map_members - 1);
2575 for (i = s; i < use->n_map_members; i++)
2576 if (!use->cost_map[i].cand)
2577 goto found;
2578 for (i = 0; i < s; i++)
2579 if (!use->cost_map[i].cand)
2580 goto found;
2582 gcc_unreachable ();
2584 found:
2585 use->cost_map[i].cand = cand;
2586 use->cost_map[i].cost = cost;
2587 use->cost_map[i].depends_on = depends_on;
2588 use->cost_map[i].value = value;
2591 /* Gets cost of (USE, CANDIDATE) pair. */
2593 static struct cost_pair *
2594 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2595 struct iv_cand *cand)
2597 unsigned i, s;
2598 struct cost_pair *ret;
2600 if (!cand)
2601 return NULL;
2603 if (data->consider_all_candidates)
2605 ret = use->cost_map + cand->id;
2606 if (!ret->cand)
2607 return NULL;
2609 return ret;
2612 /* n_map_members is a power of two, so this computes modulo. */
2613 s = cand->id & (use->n_map_members - 1);
2614 for (i = s; i < use->n_map_members; i++)
2615 if (use->cost_map[i].cand == cand)
2616 return use->cost_map + i;
2618 for (i = 0; i < s; i++)
2619 if (use->cost_map[i].cand == cand)
2620 return use->cost_map + i;
2622 return NULL;
2625 /* Returns estimate on cost of computing SEQ. */
2627 static unsigned
2628 seq_cost (rtx seq, bool speed)
2630 unsigned cost = 0;
2631 rtx set;
2633 for (; seq; seq = NEXT_INSN (seq))
2635 set = single_set (seq);
2636 if (set)
2637 cost += rtx_cost (set, SET,speed);
2638 else
2639 cost++;
2642 return cost;
2645 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2646 static rtx
2647 produce_memory_decl_rtl (tree obj, int *regno)
2649 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2650 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2651 rtx x;
2653 gcc_assert (obj);
2654 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2656 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2657 x = gen_rtx_SYMBOL_REF (address_mode, name);
2658 SET_SYMBOL_REF_DECL (x, obj);
2659 x = gen_rtx_MEM (DECL_MODE (obj), x);
2660 set_mem_addr_space (x, as);
2661 targetm.encode_section_info (obj, x, true);
2663 else
2665 x = gen_raw_REG (address_mode, (*regno)++);
2666 x = gen_rtx_MEM (DECL_MODE (obj), x);
2667 set_mem_addr_space (x, as);
2670 return x;
2673 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2674 walk_tree. DATA contains the actual fake register number. */
2676 static tree
2677 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2679 tree obj = NULL_TREE;
2680 rtx x = NULL_RTX;
2681 int *regno = (int *) data;
2683 switch (TREE_CODE (*expr_p))
2685 case ADDR_EXPR:
2686 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2687 handled_component_p (*expr_p);
2688 expr_p = &TREE_OPERAND (*expr_p, 0))
2689 continue;
2690 obj = *expr_p;
2691 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2692 x = produce_memory_decl_rtl (obj, regno);
2693 break;
2695 case SSA_NAME:
2696 *ws = 0;
2697 obj = SSA_NAME_VAR (*expr_p);
2698 if (!DECL_RTL_SET_P (obj))
2699 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2700 break;
2702 case VAR_DECL:
2703 case PARM_DECL:
2704 case RESULT_DECL:
2705 *ws = 0;
2706 obj = *expr_p;
2708 if (DECL_RTL_SET_P (obj))
2709 break;
2711 if (DECL_MODE (obj) == BLKmode)
2712 x = produce_memory_decl_rtl (obj, regno);
2713 else
2714 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2716 break;
2718 default:
2719 break;
2722 if (x)
2724 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2725 SET_DECL_RTL (obj, x);
2728 return NULL_TREE;
2731 /* Determines cost of the computation of EXPR. */
2733 static unsigned
2734 computation_cost (tree expr, bool speed)
2736 rtx seq, rslt;
2737 tree type = TREE_TYPE (expr);
2738 unsigned cost;
2739 /* Avoid using hard regs in ways which may be unsupported. */
2740 int regno = LAST_VIRTUAL_REGISTER + 1;
2741 struct cgraph_node *node = cgraph_node (current_function_decl);
2742 enum node_frequency real_frequency = node->frequency;
2744 node->frequency = NODE_FREQUENCY_NORMAL;
2745 crtl->maybe_hot_insn_p = speed;
2746 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2747 start_sequence ();
2748 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2749 seq = get_insns ();
2750 end_sequence ();
2751 default_rtl_profile ();
2752 node->frequency = real_frequency;
2754 cost = seq_cost (seq, speed);
2755 if (MEM_P (rslt))
2756 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2757 TYPE_ADDR_SPACE (type), speed);
2759 return cost;
2762 /* Returns variable containing the value of candidate CAND at statement AT. */
2764 static tree
2765 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2767 if (stmt_after_increment (loop, cand, stmt))
2768 return cand->var_after;
2769 else
2770 return cand->var_before;
2773 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2774 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2777 tree_int_cst_sign_bit (const_tree t)
2779 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2780 unsigned HOST_WIDE_INT w;
2782 if (bitno < HOST_BITS_PER_WIDE_INT)
2783 w = TREE_INT_CST_LOW (t);
2784 else
2786 w = TREE_INT_CST_HIGH (t);
2787 bitno -= HOST_BITS_PER_WIDE_INT;
2790 return (w >> bitno) & 1;
2793 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2794 same precision that is at least as wide as the precision of TYPE, stores
2795 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2796 type of A and B. */
2798 static tree
2799 determine_common_wider_type (tree *a, tree *b)
2801 tree wider_type = NULL;
2802 tree suba, subb;
2803 tree atype = TREE_TYPE (*a);
2805 if (CONVERT_EXPR_P (*a))
2807 suba = TREE_OPERAND (*a, 0);
2808 wider_type = TREE_TYPE (suba);
2809 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2810 return atype;
2812 else
2813 return atype;
2815 if (CONVERT_EXPR_P (*b))
2817 subb = TREE_OPERAND (*b, 0);
2818 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2819 return atype;
2821 else
2822 return atype;
2824 *a = suba;
2825 *b = subb;
2826 return wider_type;
2829 /* Determines the expression by that USE is expressed from induction variable
2830 CAND at statement AT in LOOP. The expression is stored in a decomposed
2831 form into AFF. Returns false if USE cannot be expressed using CAND. */
2833 static bool
2834 get_computation_aff (struct loop *loop,
2835 struct iv_use *use, struct iv_cand *cand, gimple at,
2836 struct affine_tree_combination *aff)
2838 tree ubase = use->iv->base;
2839 tree ustep = use->iv->step;
2840 tree cbase = cand->iv->base;
2841 tree cstep = cand->iv->step, cstep_common;
2842 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2843 tree common_type, var;
2844 tree uutype;
2845 aff_tree cbase_aff, var_aff;
2846 double_int rat;
2848 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2850 /* We do not have a precision to express the values of use. */
2851 return false;
2854 var = var_at_stmt (loop, cand, at);
2855 uutype = unsigned_type_for (utype);
2857 /* If the conversion is not noop, perform it. */
2858 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2860 cstep = fold_convert (uutype, cstep);
2861 cbase = fold_convert (uutype, cbase);
2862 var = fold_convert (uutype, var);
2865 if (!constant_multiple_of (ustep, cstep, &rat))
2866 return false;
2868 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2869 type, we achieve better folding by computing their difference in this
2870 wider type, and cast the result to UUTYPE. We do not need to worry about
2871 overflows, as all the arithmetics will in the end be performed in UUTYPE
2872 anyway. */
2873 common_type = determine_common_wider_type (&ubase, &cbase);
2875 /* use = ubase - ratio * cbase + ratio * var. */
2876 tree_to_aff_combination (ubase, common_type, aff);
2877 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2878 tree_to_aff_combination (var, uutype, &var_aff);
2880 /* We need to shift the value if we are after the increment. */
2881 if (stmt_after_increment (loop, cand, at))
2883 aff_tree cstep_aff;
2885 if (common_type != uutype)
2886 cstep_common = fold_convert (common_type, cstep);
2887 else
2888 cstep_common = cstep;
2890 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2891 aff_combination_add (&cbase_aff, &cstep_aff);
2894 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2895 aff_combination_add (aff, &cbase_aff);
2896 if (common_type != uutype)
2897 aff_combination_convert (aff, uutype);
2899 aff_combination_scale (&var_aff, rat);
2900 aff_combination_add (aff, &var_aff);
2902 return true;
2905 /* Determines the expression by that USE is expressed from induction variable
2906 CAND at statement AT in LOOP. The computation is unshared. */
2908 static tree
2909 get_computation_at (struct loop *loop,
2910 struct iv_use *use, struct iv_cand *cand, gimple at)
2912 aff_tree aff;
2913 tree type = TREE_TYPE (use->iv->base);
2915 if (!get_computation_aff (loop, use, cand, at, &aff))
2916 return NULL_TREE;
2917 unshare_aff_combination (&aff);
2918 return fold_convert (type, aff_combination_to_tree (&aff));
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND in LOOP. The computation is unshared. */
2924 static tree
2925 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2927 return get_computation_at (loop, use, cand, use->stmt);
2930 /* Returns cost of addition in MODE. */
2932 static unsigned
2933 add_cost (enum machine_mode mode, bool speed)
2935 static unsigned costs[NUM_MACHINE_MODES];
2936 rtx seq;
2937 unsigned cost;
2939 if (costs[mode])
2940 return costs[mode];
2942 start_sequence ();
2943 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2944 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2945 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2946 NULL_RTX);
2947 seq = get_insns ();
2948 end_sequence ();
2950 cost = seq_cost (seq, speed);
2951 if (!cost)
2952 cost = 1;
2954 costs[mode] = cost;
2956 if (dump_file && (dump_flags & TDF_DETAILS))
2957 fprintf (dump_file, "Addition in %s costs %d\n",
2958 GET_MODE_NAME (mode), cost);
2959 return cost;
2962 /* Entry in a hashtable of already known costs for multiplication. */
2963 struct mbc_entry
2965 HOST_WIDE_INT cst; /* The constant to multiply by. */
2966 enum machine_mode mode; /* In mode. */
2967 unsigned cost; /* The cost. */
2970 /* Counts hash value for the ENTRY. */
2972 static hashval_t
2973 mbc_entry_hash (const void *entry)
2975 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2977 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2980 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2982 static int
2983 mbc_entry_eq (const void *entry1, const void *entry2)
2985 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2986 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2988 return (e1->mode == e2->mode
2989 && e1->cst == e2->cst);
2992 /* Returns cost of multiplication by constant CST in MODE. */
2994 unsigned
2995 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2997 static htab_t costs;
2998 struct mbc_entry **cached, act;
2999 rtx seq;
3000 unsigned cost;
3002 if (!costs)
3003 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3005 act.mode = mode;
3006 act.cst = cst;
3007 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3008 if (*cached)
3009 return (*cached)->cost;
3011 *cached = XNEW (struct mbc_entry);
3012 (*cached)->mode = mode;
3013 (*cached)->cst = cst;
3015 start_sequence ();
3016 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3017 gen_int_mode (cst, mode), NULL_RTX, 0);
3018 seq = get_insns ();
3019 end_sequence ();
3021 cost = seq_cost (seq, speed);
3023 if (dump_file && (dump_flags & TDF_DETAILS))
3024 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3025 (int) cst, GET_MODE_NAME (mode), cost);
3027 (*cached)->cost = cost;
3029 return cost;
3032 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3033 validity for a memory reference accessing memory of mode MODE in
3034 address space AS. */
3036 DEF_VEC_P (sbitmap);
3037 DEF_VEC_ALLOC_P (sbitmap, heap);
3039 bool
3040 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3041 addr_space_t as)
3043 #define MAX_RATIO 128
3044 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3045 static VEC (sbitmap, heap) *valid_mult_list;
3046 sbitmap valid_mult;
3048 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3049 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3051 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3052 if (!valid_mult)
3054 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3055 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3056 rtx addr;
3057 HOST_WIDE_INT i;
3059 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3060 sbitmap_zero (valid_mult);
3061 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3062 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3064 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3065 if (memory_address_addr_space_p (mode, addr, as))
3066 SET_BIT (valid_mult, i + MAX_RATIO);
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3071 fprintf (dump_file, " allowed multipliers:");
3072 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3073 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3074 fprintf (dump_file, " %d", (int) i);
3075 fprintf (dump_file, "\n");
3076 fprintf (dump_file, "\n");
3079 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3082 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3083 return false;
3085 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3088 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3089 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3090 variable is omitted. Compute the cost for a memory reference that accesses
3091 a memory location of mode MEM_MODE in address space AS.
3093 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3094 size of MEM_MODE / RATIO) is available. To make this determination, we
3095 look at the size of the increment to be made, which is given in CSTEP.
3096 CSTEP may be zero if the step is unknown.
3097 STMT_AFTER_INC is true iff the statement we're looking at is after the
3098 increment of the original biv.
3100 TODO -- there must be some better way. This all is quite crude. */
3102 typedef struct
3104 HOST_WIDE_INT min_offset, max_offset;
3105 unsigned costs[2][2][2][2];
3106 } *address_cost_data;
3108 DEF_VEC_P (address_cost_data);
3109 DEF_VEC_ALLOC_P (address_cost_data, heap);
3111 static comp_cost
3112 get_address_cost (bool symbol_present, bool var_present,
3113 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3114 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3115 addr_space_t as, bool speed,
3116 bool stmt_after_inc, bool *may_autoinc)
3118 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3119 static VEC(address_cost_data, heap) *address_cost_data_list;
3120 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3121 address_cost_data data;
3122 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3123 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3124 unsigned cost, acost, complexity;
3125 bool offset_p, ratio_p, autoinc;
3126 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3127 unsigned HOST_WIDE_INT mask;
3128 unsigned bits;
3130 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3131 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3132 data_index + 1);
3134 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3135 if (!data)
3137 HOST_WIDE_INT i;
3138 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3139 HOST_WIDE_INT rat, off;
3140 int old_cse_not_expected;
3141 unsigned sym_p, var_p, off_p, rat_p, add_c;
3142 rtx seq, addr, base;
3143 rtx reg0, reg1;
3145 data = (address_cost_data) xcalloc (1, sizeof (*data));
3147 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3149 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3150 for (i = start; i <= 1 << 20; i <<= 1)
3152 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3153 if (!memory_address_addr_space_p (mem_mode, addr, as))
3154 break;
3156 data->max_offset = i == start ? 0 : i >> 1;
3157 off = data->max_offset;
3159 for (i = start; i <= 1 << 20; i <<= 1)
3161 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3162 if (!memory_address_addr_space_p (mem_mode, addr, as))
3163 break;
3165 data->min_offset = i == start ? 0 : -(i >> 1);
3167 if (dump_file && (dump_flags & TDF_DETAILS))
3169 fprintf (dump_file, "get_address_cost:\n");
3170 fprintf (dump_file, " min offset %s %d\n",
3171 GET_MODE_NAME (mem_mode),
3172 (int) data->min_offset);
3173 fprintf (dump_file, " max offset %s %d\n",
3174 GET_MODE_NAME (mem_mode),
3175 (int) data->max_offset);
3178 rat = 1;
3179 for (i = 2; i <= MAX_RATIO; i++)
3180 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3182 rat = i;
3183 break;
3186 /* Compute the cost of various addressing modes. */
3187 acost = 0;
3188 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3189 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3191 if (HAVE_PRE_DECREMENT)
3193 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3194 has_predec[mem_mode]
3195 = memory_address_addr_space_p (mem_mode, addr, as);
3197 if (HAVE_POST_DECREMENT)
3199 addr = gen_rtx_POST_DEC (address_mode, reg0);
3200 has_postdec[mem_mode]
3201 = memory_address_addr_space_p (mem_mode, addr, as);
3203 if (HAVE_PRE_INCREMENT)
3205 addr = gen_rtx_PRE_INC (address_mode, reg0);
3206 has_preinc[mem_mode]
3207 = memory_address_addr_space_p (mem_mode, addr, as);
3209 if (HAVE_POST_INCREMENT)
3211 addr = gen_rtx_POST_INC (address_mode, reg0);
3212 has_postinc[mem_mode]
3213 = memory_address_addr_space_p (mem_mode, addr, as);
3215 for (i = 0; i < 16; i++)
3217 sym_p = i & 1;
3218 var_p = (i >> 1) & 1;
3219 off_p = (i >> 2) & 1;
3220 rat_p = (i >> 3) & 1;
3222 addr = reg0;
3223 if (rat_p)
3224 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3225 gen_int_mode (rat, address_mode));
3227 if (var_p)
3228 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3230 if (sym_p)
3232 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3233 /* ??? We can run into trouble with some backends by presenting
3234 it with symbols which haven't been properly passed through
3235 targetm.encode_section_info. By setting the local bit, we
3236 enhance the probability of things working. */
3237 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3239 if (off_p)
3240 base = gen_rtx_fmt_e (CONST, address_mode,
3241 gen_rtx_fmt_ee
3242 (PLUS, address_mode, base,
3243 gen_int_mode (off, address_mode)));
3245 else if (off_p)
3246 base = gen_int_mode (off, address_mode);
3247 else
3248 base = NULL_RTX;
3250 if (base)
3251 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3253 start_sequence ();
3254 /* To avoid splitting addressing modes, pretend that no cse will
3255 follow. */
3256 old_cse_not_expected = cse_not_expected;
3257 cse_not_expected = true;
3258 addr = memory_address_addr_space (mem_mode, addr, as);
3259 cse_not_expected = old_cse_not_expected;
3260 seq = get_insns ();
3261 end_sequence ();
3263 acost = seq_cost (seq, speed);
3264 acost += address_cost (addr, mem_mode, as, speed);
3266 if (!acost)
3267 acost = 1;
3268 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3271 /* On some targets, it is quite expensive to load symbol to a register,
3272 which makes addresses that contain symbols look much more expensive.
3273 However, the symbol will have to be loaded in any case before the
3274 loop (and quite likely we have it in register already), so it does not
3275 make much sense to penalize them too heavily. So make some final
3276 tweaks for the SYMBOL_PRESENT modes:
3278 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3279 var is cheaper, use this mode with small penalty.
3280 If VAR_PRESENT is true, try whether the mode with
3281 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3282 if this is the case, use it. */
3283 add_c = add_cost (address_mode, speed);
3284 for (i = 0; i < 8; i++)
3286 var_p = i & 1;
3287 off_p = (i >> 1) & 1;
3288 rat_p = (i >> 2) & 1;
3290 acost = data->costs[0][1][off_p][rat_p] + 1;
3291 if (var_p)
3292 acost += add_c;
3294 if (acost < data->costs[1][var_p][off_p][rat_p])
3295 data->costs[1][var_p][off_p][rat_p] = acost;
3298 if (dump_file && (dump_flags & TDF_DETAILS))
3300 fprintf (dump_file, "Address costs:\n");
3302 for (i = 0; i < 16; i++)
3304 sym_p = i & 1;
3305 var_p = (i >> 1) & 1;
3306 off_p = (i >> 2) & 1;
3307 rat_p = (i >> 3) & 1;
3309 fprintf (dump_file, " ");
3310 if (sym_p)
3311 fprintf (dump_file, "sym + ");
3312 if (var_p)
3313 fprintf (dump_file, "var + ");
3314 if (off_p)
3315 fprintf (dump_file, "cst + ");
3316 if (rat_p)
3317 fprintf (dump_file, "rat * ");
3319 acost = data->costs[sym_p][var_p][off_p][rat_p];
3320 fprintf (dump_file, "index costs %d\n", acost);
3322 if (has_predec[mem_mode] || has_postdec[mem_mode]
3323 || has_preinc[mem_mode] || has_postinc[mem_mode])
3324 fprintf (dump_file, " May include autoinc/dec\n");
3325 fprintf (dump_file, "\n");
3328 VEC_replace (address_cost_data, address_cost_data_list,
3329 data_index, data);
3332 bits = GET_MODE_BITSIZE (address_mode);
3333 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3334 offset &= mask;
3335 if ((offset >> (bits - 1) & 1))
3336 offset |= ~mask;
3337 s_offset = offset;
3339 autoinc = false;
3340 msize = GET_MODE_SIZE (mem_mode);
3341 autoinc_offset = offset;
3342 if (stmt_after_inc)
3343 autoinc_offset += ratio * cstep;
3344 if (symbol_present || var_present || ratio != 1)
3345 autoinc = false;
3346 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3347 && msize == cstep)
3348 || (has_postdec[mem_mode] && autoinc_offset == 0
3349 && msize == -cstep)
3350 || (has_preinc[mem_mode] && autoinc_offset == msize
3351 && msize == cstep)
3352 || (has_predec[mem_mode] && autoinc_offset == -msize
3353 && msize == -cstep))
3354 autoinc = true;
3356 cost = 0;
3357 offset_p = (s_offset != 0
3358 && data->min_offset <= s_offset
3359 && s_offset <= data->max_offset);
3360 ratio_p = (ratio != 1
3361 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3363 if (ratio != 1 && !ratio_p)
3364 cost += multiply_by_cost (ratio, address_mode, speed);
3366 if (s_offset && !offset_p && !symbol_present)
3367 cost += add_cost (address_mode, speed);
3369 if (may_autoinc)
3370 *may_autoinc = autoinc;
3371 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3372 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3373 return new_cost (cost + acost, complexity);
3376 /* Estimates cost of forcing expression EXPR into a variable. */
3378 static comp_cost
3379 force_expr_to_var_cost (tree expr, bool speed)
3381 static bool costs_initialized = false;
3382 static unsigned integer_cost [2];
3383 static unsigned symbol_cost [2];
3384 static unsigned address_cost [2];
3385 tree op0, op1;
3386 comp_cost cost0, cost1, cost;
3387 enum machine_mode mode;
3389 if (!costs_initialized)
3391 tree type = build_pointer_type (integer_type_node);
3392 tree var, addr;
3393 rtx x;
3394 int i;
3396 var = create_tmp_var_raw (integer_type_node, "test_var");
3397 TREE_STATIC (var) = 1;
3398 x = produce_memory_decl_rtl (var, NULL);
3399 SET_DECL_RTL (var, x);
3401 addr = build1 (ADDR_EXPR, type, var);
3404 for (i = 0; i < 2; i++)
3406 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3407 2000), i);
3409 symbol_cost[i] = computation_cost (addr, i) + 1;
3411 address_cost[i]
3412 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3413 addr,
3414 build_int_cst (sizetype, 2000)), i) + 1;
3415 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3418 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3419 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3420 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3421 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3422 fprintf (dump_file, "\n");
3426 costs_initialized = true;
3429 STRIP_NOPS (expr);
3431 if (SSA_VAR_P (expr))
3432 return zero_cost;
3434 if (is_gimple_min_invariant (expr))
3436 if (TREE_CODE (expr) == INTEGER_CST)
3437 return new_cost (integer_cost [speed], 0);
3439 if (TREE_CODE (expr) == ADDR_EXPR)
3441 tree obj = TREE_OPERAND (expr, 0);
3443 if (TREE_CODE (obj) == VAR_DECL
3444 || TREE_CODE (obj) == PARM_DECL
3445 || TREE_CODE (obj) == RESULT_DECL)
3446 return new_cost (symbol_cost [speed], 0);
3449 return new_cost (address_cost [speed], 0);
3452 switch (TREE_CODE (expr))
3454 case POINTER_PLUS_EXPR:
3455 case PLUS_EXPR:
3456 case MINUS_EXPR:
3457 case MULT_EXPR:
3458 op0 = TREE_OPERAND (expr, 0);
3459 op1 = TREE_OPERAND (expr, 1);
3460 STRIP_NOPS (op0);
3461 STRIP_NOPS (op1);
3463 if (is_gimple_val (op0))
3464 cost0 = zero_cost;
3465 else
3466 cost0 = force_expr_to_var_cost (op0, speed);
3468 if (is_gimple_val (op1))
3469 cost1 = zero_cost;
3470 else
3471 cost1 = force_expr_to_var_cost (op1, speed);
3473 break;
3475 case NEGATE_EXPR:
3476 op0 = TREE_OPERAND (expr, 0);
3477 STRIP_NOPS (op0);
3478 op1 = NULL_TREE;
3480 if (is_gimple_val (op0))
3481 cost0 = zero_cost;
3482 else
3483 cost0 = force_expr_to_var_cost (op0, speed);
3485 cost1 = zero_cost;
3486 break;
3488 default:
3489 /* Just an arbitrary value, FIXME. */
3490 return new_cost (target_spill_cost[speed], 0);
3493 mode = TYPE_MODE (TREE_TYPE (expr));
3494 switch (TREE_CODE (expr))
3496 case POINTER_PLUS_EXPR:
3497 case PLUS_EXPR:
3498 case MINUS_EXPR:
3499 case NEGATE_EXPR:
3500 cost = new_cost (add_cost (mode, speed), 0);
3501 break;
3503 case MULT_EXPR:
3504 if (cst_and_fits_in_hwi (op0))
3505 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3506 else if (cst_and_fits_in_hwi (op1))
3507 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3508 else
3509 return new_cost (target_spill_cost [speed], 0);
3510 break;
3512 default:
3513 gcc_unreachable ();
3516 cost = add_costs (cost, cost0);
3517 cost = add_costs (cost, cost1);
3519 /* Bound the cost by target_spill_cost. The parts of complicated
3520 computations often are either loop invariant or at least can
3521 be shared between several iv uses, so letting this grow without
3522 limits would not give reasonable results. */
3523 if (cost.cost > (int) target_spill_cost [speed])
3524 cost.cost = target_spill_cost [speed];
3526 return cost;
3529 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3530 invariants the computation depends on. */
3532 static comp_cost
3533 force_var_cost (struct ivopts_data *data,
3534 tree expr, bitmap *depends_on)
3536 if (depends_on)
3538 fd_ivopts_data = data;
3539 walk_tree (&expr, find_depends, depends_on, NULL);
3542 return force_expr_to_var_cost (expr, data->speed);
3545 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3546 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3547 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3548 invariants the computation depends on. */
3550 static comp_cost
3551 split_address_cost (struct ivopts_data *data,
3552 tree addr, bool *symbol_present, bool *var_present,
3553 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3555 tree core;
3556 HOST_WIDE_INT bitsize;
3557 HOST_WIDE_INT bitpos;
3558 tree toffset;
3559 enum machine_mode mode;
3560 int unsignedp, volatilep;
3562 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3563 &unsignedp, &volatilep, false);
3565 if (toffset != 0
3566 || bitpos % BITS_PER_UNIT != 0
3567 || TREE_CODE (core) != VAR_DECL)
3569 *symbol_present = false;
3570 *var_present = true;
3571 fd_ivopts_data = data;
3572 walk_tree (&addr, find_depends, depends_on, NULL);
3573 return new_cost (target_spill_cost[data->speed], 0);
3576 *offset += bitpos / BITS_PER_UNIT;
3577 if (TREE_STATIC (core)
3578 || DECL_EXTERNAL (core))
3580 *symbol_present = true;
3581 *var_present = false;
3582 return zero_cost;
3585 *symbol_present = false;
3586 *var_present = true;
3587 return zero_cost;
3590 /* Estimates cost of expressing difference of addresses E1 - E2 as
3591 var + symbol + offset. The value of offset is added to OFFSET,
3592 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3593 part is missing. DEPENDS_ON is a set of the invariants the computation
3594 depends on. */
3596 static comp_cost
3597 ptr_difference_cost (struct ivopts_data *data,
3598 tree e1, tree e2, bool *symbol_present, bool *var_present,
3599 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3601 HOST_WIDE_INT diff = 0;
3602 aff_tree aff_e1, aff_e2;
3603 tree type;
3605 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3607 if (ptr_difference_const (e1, e2, &diff))
3609 *offset += diff;
3610 *symbol_present = false;
3611 *var_present = false;
3612 return zero_cost;
3615 if (integer_zerop (e2))
3616 return split_address_cost (data, TREE_OPERAND (e1, 0),
3617 symbol_present, var_present, offset, depends_on);
3619 *symbol_present = false;
3620 *var_present = true;
3622 type = signed_type_for (TREE_TYPE (e1));
3623 tree_to_aff_combination (e1, type, &aff_e1);
3624 tree_to_aff_combination (e2, type, &aff_e2);
3625 aff_combination_scale (&aff_e2, double_int_minus_one);
3626 aff_combination_add (&aff_e1, &aff_e2);
3628 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3631 /* Estimates cost of expressing difference E1 - E2 as
3632 var + symbol + offset. The value of offset is added to OFFSET,
3633 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3634 part is missing. DEPENDS_ON is a set of the invariants the computation
3635 depends on. */
3637 static comp_cost
3638 difference_cost (struct ivopts_data *data,
3639 tree e1, tree e2, bool *symbol_present, bool *var_present,
3640 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3643 unsigned HOST_WIDE_INT off1, off2;
3644 aff_tree aff_e1, aff_e2;
3645 tree type;
3647 e1 = strip_offset (e1, &off1);
3648 e2 = strip_offset (e2, &off2);
3649 *offset += off1 - off2;
3651 STRIP_NOPS (e1);
3652 STRIP_NOPS (e2);
3654 if (TREE_CODE (e1) == ADDR_EXPR)
3655 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3656 offset, depends_on);
3657 *symbol_present = false;
3659 if (operand_equal_p (e1, e2, 0))
3661 *var_present = false;
3662 return zero_cost;
3665 *var_present = true;
3667 if (integer_zerop (e2))
3668 return force_var_cost (data, e1, depends_on);
3670 if (integer_zerop (e1))
3672 comp_cost cost = force_var_cost (data, e2, depends_on);
3673 cost.cost += multiply_by_cost (-1, mode, data->speed);
3674 return cost;
3677 type = signed_type_for (TREE_TYPE (e1));
3678 tree_to_aff_combination (e1, type, &aff_e1);
3679 tree_to_aff_combination (e2, type, &aff_e2);
3680 aff_combination_scale (&aff_e2, double_int_minus_one);
3681 aff_combination_add (&aff_e1, &aff_e2);
3683 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3686 /* Determines the cost of the computation by that USE is expressed
3687 from induction variable CAND. If ADDRESS_P is true, we just need
3688 to create an address from it, otherwise we want to get it into
3689 register. A set of invariants we depend on is stored in
3690 DEPENDS_ON. AT is the statement at that the value is computed.
3691 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3692 addressing is likely. */
3694 static comp_cost
3695 get_computation_cost_at (struct ivopts_data *data,
3696 struct iv_use *use, struct iv_cand *cand,
3697 bool address_p, bitmap *depends_on, gimple at,
3698 bool *can_autoinc)
3700 tree ubase = use->iv->base, ustep = use->iv->step;
3701 tree cbase, cstep;
3702 tree utype = TREE_TYPE (ubase), ctype;
3703 unsigned HOST_WIDE_INT cstepi, offset = 0;
3704 HOST_WIDE_INT ratio, aratio;
3705 bool var_present, symbol_present, stmt_is_after_inc;
3706 comp_cost cost;
3707 double_int rat;
3708 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3710 *depends_on = NULL;
3712 /* Only consider real candidates. */
3713 if (!cand->iv)
3714 return infinite_cost;
3716 cbase = cand->iv->base;
3717 cstep = cand->iv->step;
3718 ctype = TREE_TYPE (cbase);
3720 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3722 /* We do not have a precision to express the values of use. */
3723 return infinite_cost;
3726 if (address_p)
3728 /* Do not try to express address of an object with computation based
3729 on address of a different object. This may cause problems in rtl
3730 level alias analysis (that does not expect this to be happening,
3731 as this is illegal in C), and would be unlikely to be useful
3732 anyway. */
3733 if (use->iv->base_object
3734 && cand->iv->base_object
3735 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3736 return infinite_cost;
3739 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3741 /* TODO -- add direct handling of this case. */
3742 goto fallback;
3745 /* CSTEPI is removed from the offset in case statement is after the
3746 increment. If the step is not constant, we use zero instead.
3747 This is a bit imprecise (there is the extra addition), but
3748 redundancy elimination is likely to transform the code so that
3749 it uses value of the variable before increment anyway,
3750 so it is not that much unrealistic. */
3751 if (cst_and_fits_in_hwi (cstep))
3752 cstepi = int_cst_value (cstep);
3753 else
3754 cstepi = 0;
3756 if (!constant_multiple_of (ustep, cstep, &rat))
3757 return infinite_cost;
3759 if (double_int_fits_in_shwi_p (rat))
3760 ratio = double_int_to_shwi (rat);
3761 else
3762 return infinite_cost;
3764 STRIP_NOPS (cbase);
3765 ctype = TREE_TYPE (cbase);
3767 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3768 or ratio == 1, it is better to handle this like
3770 ubase - ratio * cbase + ratio * var
3772 (also holds in the case ratio == -1, TODO. */
3774 if (cst_and_fits_in_hwi (cbase))
3776 offset = - ratio * int_cst_value (cbase);
3777 cost = difference_cost (data,
3778 ubase, build_int_cst (utype, 0),
3779 &symbol_present, &var_present, &offset,
3780 depends_on);
3782 else if (ratio == 1)
3784 cost = difference_cost (data,
3785 ubase, cbase,
3786 &symbol_present, &var_present, &offset,
3787 depends_on);
3789 else if (address_p
3790 && !POINTER_TYPE_P (ctype)
3791 && multiplier_allowed_in_address_p
3792 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3793 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3795 cbase
3796 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3797 cost = difference_cost (data,
3798 ubase, cbase,
3799 &symbol_present, &var_present, &offset,
3800 depends_on);
3802 else
3804 cost = force_var_cost (data, cbase, depends_on);
3805 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3806 cost = add_costs (cost,
3807 difference_cost (data,
3808 ubase, build_int_cst (utype, 0),
3809 &symbol_present, &var_present,
3810 &offset, depends_on));
3813 /* If we are after the increment, the value of the candidate is higher by
3814 one iteration. */
3815 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3816 if (stmt_is_after_inc)
3817 offset -= ratio * cstepi;
3819 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3820 (symbol/var1/const parts may be omitted). If we are looking for an
3821 address, find the cost of addressing this. */
3822 if (address_p)
3823 return add_costs (cost,
3824 get_address_cost (symbol_present, var_present,
3825 offset, ratio, cstepi,
3826 TYPE_MODE (TREE_TYPE (utype)),
3827 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3828 speed, stmt_is_after_inc,
3829 can_autoinc));
3831 /* Otherwise estimate the costs for computing the expression. */
3832 if (!symbol_present && !var_present && !offset)
3834 if (ratio != 1)
3835 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3836 return cost;
3839 /* Symbol + offset should be compile-time computable so consider that they
3840 are added once to the variable, if present. */
3841 if (var_present && (symbol_present || offset))
3842 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3843 / AVG_LOOP_NITER (data->current_loop);
3845 /* Having offset does not affect runtime cost in case it is added to
3846 symbol, but it increases complexity. */
3847 if (offset)
3848 cost.complexity++;
3850 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3852 aratio = ratio > 0 ? ratio : -ratio;
3853 if (aratio != 1)
3854 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3856 fallback:
3857 if (can_autoinc)
3858 *can_autoinc = false;
3861 /* Just get the expression, expand it and measure the cost. */
3862 tree comp = get_computation_at (data->current_loop, use, cand, at);
3864 if (!comp)
3865 return infinite_cost;
3867 if (address_p)
3868 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3870 return new_cost (computation_cost (comp, speed), 0);
3874 /* Determines the cost of the computation by that USE is expressed
3875 from induction variable CAND. If ADDRESS_P is true, we just need
3876 to create an address from it, otherwise we want to get it into
3877 register. A set of invariants we depend on is stored in
3878 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3879 autoinc addressing is likely. */
3881 static comp_cost
3882 get_computation_cost (struct ivopts_data *data,
3883 struct iv_use *use, struct iv_cand *cand,
3884 bool address_p, bitmap *depends_on, bool *can_autoinc)
3886 return get_computation_cost_at (data,
3887 use, cand, address_p, depends_on, use->stmt,
3888 can_autoinc);
3891 /* Determines cost of basing replacement of USE on CAND in a generic
3892 expression. */
3894 static bool
3895 determine_use_iv_cost_generic (struct ivopts_data *data,
3896 struct iv_use *use, struct iv_cand *cand)
3898 bitmap depends_on;
3899 comp_cost cost;
3901 /* The simple case first -- if we need to express value of the preserved
3902 original biv, the cost is 0. This also prevents us from counting the
3903 cost of increment twice -- once at this use and once in the cost of
3904 the candidate. */
3905 if (cand->pos == IP_ORIGINAL
3906 && cand->incremented_at == use->stmt)
3908 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3909 return true;
3912 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3913 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3915 return !infinite_cost_p (cost);
3918 /* Determines cost of basing replacement of USE on CAND in an address. */
3920 static bool
3921 determine_use_iv_cost_address (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3924 bitmap depends_on;
3925 bool can_autoinc;
3926 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3927 &can_autoinc);
3929 if (cand->ainc_use == use)
3931 if (can_autoinc)
3932 cost.cost -= cand->cost_step;
3933 /* If we generated the candidate solely for exploiting autoincrement
3934 opportunities, and it turns out it can't be used, set the cost to
3935 infinity to make sure we ignore it. */
3936 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3937 cost = infinite_cost;
3939 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3941 return !infinite_cost_p (cost);
3944 /* Computes value of candidate CAND at position AT in iteration NITER, and
3945 stores it to VAL. */
3947 static void
3948 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3949 aff_tree *val)
3951 aff_tree step, delta, nit;
3952 struct iv *iv = cand->iv;
3953 tree type = TREE_TYPE (iv->base);
3954 tree steptype = type;
3955 if (POINTER_TYPE_P (type))
3956 steptype = sizetype;
3958 tree_to_aff_combination (iv->step, steptype, &step);
3959 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3960 aff_combination_convert (&nit, steptype);
3961 aff_combination_mult (&nit, &step, &delta);
3962 if (stmt_after_increment (loop, cand, at))
3963 aff_combination_add (&delta, &step);
3965 tree_to_aff_combination (iv->base, type, val);
3966 aff_combination_add (val, &delta);
3969 /* Returns period of induction variable iv. */
3971 static tree
3972 iv_period (struct iv *iv)
3974 tree step = iv->step, period, type;
3975 tree pow2div;
3977 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3979 /* Period of the iv is gcd (step, type range). Since type range is power
3980 of two, it suffices to determine the maximum power of two that divides
3981 step. */
3982 pow2div = num_ending_zeros (step);
3983 type = unsigned_type_for (TREE_TYPE (step));
3985 period = build_low_bits_mask (type,
3986 (TYPE_PRECISION (type)
3987 - tree_low_cst (pow2div, 1)));
3989 return period;
3992 /* Returns the comparison operator used when eliminating the iv USE. */
3994 static enum tree_code
3995 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3997 struct loop *loop = data->current_loop;
3998 basic_block ex_bb;
3999 edge exit;
4001 ex_bb = gimple_bb (use->stmt);
4002 exit = EDGE_SUCC (ex_bb, 0);
4003 if (flow_bb_inside_loop_p (loop, exit->dest))
4004 exit = EDGE_SUCC (ex_bb, 1);
4006 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4009 /* Check whether it is possible to express the condition in USE by comparison
4010 of candidate CAND. If so, store the value compared with to BOUND. */
4012 static bool
4013 may_eliminate_iv (struct ivopts_data *data,
4014 struct iv_use *use, struct iv_cand *cand, tree *bound)
4016 basic_block ex_bb;
4017 edge exit;
4018 tree nit, period;
4019 struct loop *loop = data->current_loop;
4020 aff_tree bnd;
4022 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4023 return false;
4025 /* For now works only for exits that dominate the loop latch.
4026 TODO: extend to other conditions inside loop body. */
4027 ex_bb = gimple_bb (use->stmt);
4028 if (use->stmt != last_stmt (ex_bb)
4029 || gimple_code (use->stmt) != GIMPLE_COND
4030 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4031 return false;
4033 exit = EDGE_SUCC (ex_bb, 0);
4034 if (flow_bb_inside_loop_p (loop, exit->dest))
4035 exit = EDGE_SUCC (ex_bb, 1);
4036 if (flow_bb_inside_loop_p (loop, exit->dest))
4037 return false;
4039 nit = niter_for_exit (data, exit);
4040 if (!nit)
4041 return false;
4043 /* Determine whether we can use the variable to test the exit condition.
4044 This is the case iff the period of the induction variable is greater
4045 than the number of iterations for which the exit condition is true. */
4046 period = iv_period (cand->iv);
4048 /* If the number of iterations is constant, compare against it directly. */
4049 if (TREE_CODE (nit) == INTEGER_CST)
4051 if (!tree_int_cst_lt (nit, period))
4052 return false;
4055 /* If not, and if this is the only possible exit of the loop, see whether
4056 we can get a conservative estimate on the number of iterations of the
4057 entire loop and compare against that instead. */
4058 else if (loop_only_exit_p (loop, exit))
4060 double_int period_value, max_niter;
4061 if (!estimated_loop_iterations (loop, true, &max_niter))
4062 return false;
4063 period_value = tree_to_double_int (period);
4064 if (double_int_ucmp (max_niter, period_value) >= 0)
4065 return false;
4068 /* Otherwise, punt. */
4069 else
4070 return false;
4072 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4074 *bound = aff_combination_to_tree (&bnd);
4075 /* It is unlikely that computing the number of iterations using division
4076 would be more profitable than keeping the original induction variable. */
4077 if (expression_expensive_p (*bound))
4078 return false;
4079 return true;
4082 /* Determines cost of basing replacement of USE on CAND in a condition. */
4084 static bool
4085 determine_use_iv_cost_condition (struct ivopts_data *data,
4086 struct iv_use *use, struct iv_cand *cand)
4088 tree bound = NULL_TREE;
4089 struct iv *cmp_iv;
4090 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4091 comp_cost elim_cost, express_cost, cost;
4092 bool ok;
4093 tree *control_var, *bound_cst;
4095 /* Only consider real candidates. */
4096 if (!cand->iv)
4098 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4099 return false;
4102 /* Try iv elimination. */
4103 if (may_eliminate_iv (data, use, cand, &bound))
4105 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4106 /* The bound is a loop invariant, so it will be only computed
4107 once. */
4108 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4110 else
4111 elim_cost = infinite_cost;
4113 /* Try expressing the original giv. If it is compared with an invariant,
4114 note that we cannot get rid of it. */
4115 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4116 NULL, &cmp_iv);
4117 gcc_assert (ok);
4119 /* When the condition is a comparison of the candidate IV against
4120 zero, prefer this IV.
4122 TODO: The constant that we're substracting from the cost should
4123 be target-dependent. This information should be added to the
4124 target costs for each backend. */
4125 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4126 && integer_zerop (*bound_cst)
4127 && (operand_equal_p (*control_var, cand->var_after, 0)
4128 || operand_equal_p (*control_var, cand->var_before, 0)))
4129 elim_cost.cost -= 1;
4131 express_cost = get_computation_cost (data, use, cand, false,
4132 &depends_on_express, NULL);
4133 fd_ivopts_data = data;
4134 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4136 /* Choose the better approach, preferring the eliminated IV. */
4137 if (compare_costs (elim_cost, express_cost) <= 0)
4139 cost = elim_cost;
4140 depends_on = depends_on_elim;
4141 depends_on_elim = NULL;
4143 else
4145 cost = express_cost;
4146 depends_on = depends_on_express;
4147 depends_on_express = NULL;
4148 bound = NULL_TREE;
4151 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4153 if (depends_on_elim)
4154 BITMAP_FREE (depends_on_elim);
4155 if (depends_on_express)
4156 BITMAP_FREE (depends_on_express);
4158 return !infinite_cost_p (cost);
4161 /* Determines cost of basing replacement of USE on CAND. Returns false
4162 if USE cannot be based on CAND. */
4164 static bool
4165 determine_use_iv_cost (struct ivopts_data *data,
4166 struct iv_use *use, struct iv_cand *cand)
4168 switch (use->type)
4170 case USE_NONLINEAR_EXPR:
4171 return determine_use_iv_cost_generic (data, use, cand);
4173 case USE_ADDRESS:
4174 return determine_use_iv_cost_address (data, use, cand);
4176 case USE_COMPARE:
4177 return determine_use_iv_cost_condition (data, use, cand);
4179 default:
4180 gcc_unreachable ();
4184 /* Return true if get_computation_cost indicates that autoincrement is
4185 a possibility for the pair of USE and CAND, false otherwise. */
4187 static bool
4188 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4189 struct iv_cand *cand)
4191 bitmap depends_on;
4192 bool can_autoinc;
4193 comp_cost cost;
4195 if (use->type != USE_ADDRESS)
4196 return false;
4198 cost = get_computation_cost (data, use, cand, true, &depends_on,
4199 &can_autoinc);
4201 BITMAP_FREE (depends_on);
4203 return !infinite_cost_p (cost) && can_autoinc;
4206 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4207 use that allows autoincrement, and set their AINC_USE if possible. */
4209 static void
4210 set_autoinc_for_original_candidates (struct ivopts_data *data)
4212 unsigned i, j;
4214 for (i = 0; i < n_iv_cands (data); i++)
4216 struct iv_cand *cand = iv_cand (data, i);
4217 struct iv_use *closest = NULL;
4218 if (cand->pos != IP_ORIGINAL)
4219 continue;
4220 for (j = 0; j < n_iv_uses (data); j++)
4222 struct iv_use *use = iv_use (data, j);
4223 unsigned uid = gimple_uid (use->stmt);
4224 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4225 || uid > gimple_uid (cand->incremented_at))
4226 continue;
4227 if (closest == NULL || uid > gimple_uid (closest->stmt))
4228 closest = use;
4230 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4231 continue;
4232 cand->ainc_use = closest;
4236 /* Finds the candidates for the induction variables. */
4238 static void
4239 find_iv_candidates (struct ivopts_data *data)
4241 /* Add commonly used ivs. */
4242 add_standard_iv_candidates (data);
4244 /* Add old induction variables. */
4245 add_old_ivs_candidates (data);
4247 /* Add induction variables derived from uses. */
4248 add_derived_ivs_candidates (data);
4250 set_autoinc_for_original_candidates (data);
4252 /* Record the important candidates. */
4253 record_important_candidates (data);
4256 /* Determines costs of basing the use of the iv on an iv candidate. */
4258 static void
4259 determine_use_iv_costs (struct ivopts_data *data)
4261 unsigned i, j;
4262 struct iv_use *use;
4263 struct iv_cand *cand;
4264 bitmap to_clear = BITMAP_ALLOC (NULL);
4266 alloc_use_cost_map (data);
4268 for (i = 0; i < n_iv_uses (data); i++)
4270 use = iv_use (data, i);
4272 if (data->consider_all_candidates)
4274 for (j = 0; j < n_iv_cands (data); j++)
4276 cand = iv_cand (data, j);
4277 determine_use_iv_cost (data, use, cand);
4280 else
4282 bitmap_iterator bi;
4284 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4286 cand = iv_cand (data, j);
4287 if (!determine_use_iv_cost (data, use, cand))
4288 bitmap_set_bit (to_clear, j);
4291 /* Remove the candidates for that the cost is infinite from
4292 the list of related candidates. */
4293 bitmap_and_compl_into (use->related_cands, to_clear);
4294 bitmap_clear (to_clear);
4298 BITMAP_FREE (to_clear);
4300 if (dump_file && (dump_flags & TDF_DETAILS))
4302 fprintf (dump_file, "Use-candidate costs:\n");
4304 for (i = 0; i < n_iv_uses (data); i++)
4306 use = iv_use (data, i);
4308 fprintf (dump_file, "Use %d:\n", i);
4309 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4310 for (j = 0; j < use->n_map_members; j++)
4312 if (!use->cost_map[j].cand
4313 || infinite_cost_p (use->cost_map[j].cost))
4314 continue;
4316 fprintf (dump_file, " %d\t%d\t%d\t",
4317 use->cost_map[j].cand->id,
4318 use->cost_map[j].cost.cost,
4319 use->cost_map[j].cost.complexity);
4320 if (use->cost_map[j].depends_on)
4321 bitmap_print (dump_file,
4322 use->cost_map[j].depends_on, "","");
4323 fprintf (dump_file, "\n");
4326 fprintf (dump_file, "\n");
4328 fprintf (dump_file, "\n");
4332 /* Determines cost of the candidate CAND. */
4334 static void
4335 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4337 comp_cost cost_base;
4338 unsigned cost, cost_step;
4339 tree base;
4341 if (!cand->iv)
4343 cand->cost = 0;
4344 return;
4347 /* There are two costs associated with the candidate -- its increment
4348 and its initialization. The second is almost negligible for any loop
4349 that rolls enough, so we take it just very little into account. */
4351 base = cand->iv->base;
4352 cost_base = force_var_cost (data, base, NULL);
4353 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4355 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4357 /* Prefer the original ivs unless we may gain something by replacing it.
4358 The reason is to make debugging simpler; so this is not relevant for
4359 artificial ivs created by other optimization passes. */
4360 if (cand->pos != IP_ORIGINAL
4361 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4362 cost++;
4364 /* Prefer not to insert statements into latch unless there are some
4365 already (so that we do not create unnecessary jumps). */
4366 if (cand->pos == IP_END
4367 && empty_block_p (ip_end_pos (data->current_loop)))
4368 cost++;
4370 cand->cost = cost;
4371 cand->cost_step = cost_step;
4374 /* Determines costs of computation of the candidates. */
4376 static void
4377 determine_iv_costs (struct ivopts_data *data)
4379 unsigned i;
4381 if (dump_file && (dump_flags & TDF_DETAILS))
4383 fprintf (dump_file, "Candidate costs:\n");
4384 fprintf (dump_file, " cand\tcost\n");
4387 for (i = 0; i < n_iv_cands (data); i++)
4389 struct iv_cand *cand = iv_cand (data, i);
4391 determine_iv_cost (data, cand);
4393 if (dump_file && (dump_flags & TDF_DETAILS))
4394 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4397 if (dump_file && (dump_flags & TDF_DETAILS))
4398 fprintf (dump_file, "\n");
4401 /* Calculates cost for having SIZE induction variables. */
4403 static unsigned
4404 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4406 /* We add size to the cost, so that we prefer eliminating ivs
4407 if possible. */
4408 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4411 /* For each size of the induction variable set determine the penalty. */
4413 static void
4414 determine_set_costs (struct ivopts_data *data)
4416 unsigned j, n;
4417 gimple phi;
4418 gimple_stmt_iterator psi;
4419 tree op;
4420 struct loop *loop = data->current_loop;
4421 bitmap_iterator bi;
4423 /* We use the following model (definitely improvable, especially the
4424 cost function -- TODO):
4426 We estimate the number of registers available (using MD data), name it A.
4428 We estimate the number of registers used by the loop, name it U. This
4429 number is obtained as the number of loop phi nodes (not counting virtual
4430 registers and bivs) + the number of variables from outside of the loop.
4432 We set a reserve R (free regs that are used for temporary computations,
4433 etc.). For now the reserve is a constant 3.
4435 Let I be the number of induction variables.
4437 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4438 make a lot of ivs without a reason).
4439 -- if A - R < U + I <= A, the cost is I * PRES_COST
4440 -- if U + I > A, the cost is I * PRES_COST and
4441 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4443 if (dump_file && (dump_flags & TDF_DETAILS))
4445 fprintf (dump_file, "Global costs:\n");
4446 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4447 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4448 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4451 n = 0;
4452 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4454 phi = gsi_stmt (psi);
4455 op = PHI_RESULT (phi);
4457 if (!is_gimple_reg (op))
4458 continue;
4460 if (get_iv (data, op))
4461 continue;
4463 n++;
4466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4468 struct version_info *info = ver_info (data, j);
4470 if (info->inv_id && info->has_nonlin_use)
4471 n++;
4474 data->regs_used = n;
4475 if (dump_file && (dump_flags & TDF_DETAILS))
4476 fprintf (dump_file, " regs_used %d\n", n);
4478 if (dump_file && (dump_flags & TDF_DETAILS))
4480 fprintf (dump_file, " cost for size:\n");
4481 fprintf (dump_file, " ivs\tcost\n");
4482 for (j = 0; j <= 2 * target_avail_regs; j++)
4483 fprintf (dump_file, " %d\t%d\n", j,
4484 ivopts_global_cost_for_size (data, j));
4485 fprintf (dump_file, "\n");
4489 /* Returns true if A is a cheaper cost pair than B. */
4491 static bool
4492 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4494 int cmp;
4496 if (!a)
4497 return false;
4499 if (!b)
4500 return true;
4502 cmp = compare_costs (a->cost, b->cost);
4503 if (cmp < 0)
4504 return true;
4506 if (cmp > 0)
4507 return false;
4509 /* In case the costs are the same, prefer the cheaper candidate. */
4510 if (a->cand->cost < b->cand->cost)
4511 return true;
4513 return false;
4516 /* Computes the cost field of IVS structure. */
4518 static void
4519 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4521 comp_cost cost = ivs->cand_use_cost;
4522 cost.cost += ivs->cand_cost;
4523 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4525 ivs->cost = cost;
4528 /* Remove invariants in set INVS to set IVS. */
4530 static void
4531 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4533 bitmap_iterator bi;
4534 unsigned iid;
4536 if (!invs)
4537 return;
4539 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4541 ivs->n_invariant_uses[iid]--;
4542 if (ivs->n_invariant_uses[iid] == 0)
4543 ivs->n_regs--;
4547 /* Set USE not to be expressed by any candidate in IVS. */
4549 static void
4550 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4551 struct iv_use *use)
4553 unsigned uid = use->id, cid;
4554 struct cost_pair *cp;
4556 cp = ivs->cand_for_use[uid];
4557 if (!cp)
4558 return;
4559 cid = cp->cand->id;
4561 ivs->bad_uses++;
4562 ivs->cand_for_use[uid] = NULL;
4563 ivs->n_cand_uses[cid]--;
4565 if (ivs->n_cand_uses[cid] == 0)
4567 bitmap_clear_bit (ivs->cands, cid);
4568 /* Do not count the pseudocandidates. */
4569 if (cp->cand->iv)
4570 ivs->n_regs--;
4571 ivs->n_cands--;
4572 ivs->cand_cost -= cp->cand->cost;
4574 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4577 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4579 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4580 iv_ca_recount_cost (data, ivs);
4583 /* Add invariants in set INVS to set IVS. */
4585 static void
4586 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4588 bitmap_iterator bi;
4589 unsigned iid;
4591 if (!invs)
4592 return;
4594 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4596 ivs->n_invariant_uses[iid]++;
4597 if (ivs->n_invariant_uses[iid] == 1)
4598 ivs->n_regs++;
4602 /* Set cost pair for USE in set IVS to CP. */
4604 static void
4605 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4606 struct iv_use *use, struct cost_pair *cp)
4608 unsigned uid = use->id, cid;
4610 if (ivs->cand_for_use[uid] == cp)
4611 return;
4613 if (ivs->cand_for_use[uid])
4614 iv_ca_set_no_cp (data, ivs, use);
4616 if (cp)
4618 cid = cp->cand->id;
4620 ivs->bad_uses--;
4621 ivs->cand_for_use[uid] = cp;
4622 ivs->n_cand_uses[cid]++;
4623 if (ivs->n_cand_uses[cid] == 1)
4625 bitmap_set_bit (ivs->cands, cid);
4626 /* Do not count the pseudocandidates. */
4627 if (cp->cand->iv)
4628 ivs->n_regs++;
4629 ivs->n_cands++;
4630 ivs->cand_cost += cp->cand->cost;
4632 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4635 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4636 iv_ca_set_add_invariants (ivs, cp->depends_on);
4637 iv_ca_recount_cost (data, ivs);
4641 /* Extend set IVS by expressing USE by some of the candidates in it
4642 if possible. */
4644 static void
4645 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4646 struct iv_use *use)
4648 struct cost_pair *best_cp = NULL, *cp;
4649 bitmap_iterator bi;
4650 unsigned i;
4652 gcc_assert (ivs->upto >= use->id);
4654 if (ivs->upto == use->id)
4656 ivs->upto++;
4657 ivs->bad_uses++;
4660 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4662 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4664 if (cheaper_cost_pair (cp, best_cp))
4665 best_cp = cp;
4668 iv_ca_set_cp (data, ivs, use, best_cp);
4671 /* Get cost for assignment IVS. */
4673 static comp_cost
4674 iv_ca_cost (struct iv_ca *ivs)
4676 /* This was a conditional expression but it triggered a bug in
4677 Sun C 5.5. */
4678 if (ivs->bad_uses)
4679 return infinite_cost;
4680 else
4681 return ivs->cost;
4684 /* Returns true if all dependences of CP are among invariants in IVS. */
4686 static bool
4687 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4689 unsigned i;
4690 bitmap_iterator bi;
4692 if (!cp->depends_on)
4693 return true;
4695 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4697 if (ivs->n_invariant_uses[i] == 0)
4698 return false;
4701 return true;
4704 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4705 it before NEXT_CHANGE. */
4707 static struct iv_ca_delta *
4708 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4709 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4711 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4713 change->use = use;
4714 change->old_cp = old_cp;
4715 change->new_cp = new_cp;
4716 change->next_change = next_change;
4718 return change;
4721 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4722 are rewritten. */
4724 static struct iv_ca_delta *
4725 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4727 struct iv_ca_delta *last;
4729 if (!l2)
4730 return l1;
4732 if (!l1)
4733 return l2;
4735 for (last = l1; last->next_change; last = last->next_change)
4736 continue;
4737 last->next_change = l2;
4739 return l1;
4742 /* Returns candidate by that USE is expressed in IVS. */
4744 static struct cost_pair *
4745 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4747 return ivs->cand_for_use[use->id];
4750 /* Reverse the list of changes DELTA, forming the inverse to it. */
4752 static struct iv_ca_delta *
4753 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4755 struct iv_ca_delta *act, *next, *prev = NULL;
4756 struct cost_pair *tmp;
4758 for (act = delta; act; act = next)
4760 next = act->next_change;
4761 act->next_change = prev;
4762 prev = act;
4764 tmp = act->old_cp;
4765 act->old_cp = act->new_cp;
4766 act->new_cp = tmp;
4769 return prev;
4772 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4773 reverted instead. */
4775 static void
4776 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4777 struct iv_ca_delta *delta, bool forward)
4779 struct cost_pair *from, *to;
4780 struct iv_ca_delta *act;
4782 if (!forward)
4783 delta = iv_ca_delta_reverse (delta);
4785 for (act = delta; act; act = act->next_change)
4787 from = act->old_cp;
4788 to = act->new_cp;
4789 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4790 iv_ca_set_cp (data, ivs, act->use, to);
4793 if (!forward)
4794 iv_ca_delta_reverse (delta);
4797 /* Returns true if CAND is used in IVS. */
4799 static bool
4800 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4802 return ivs->n_cand_uses[cand->id] > 0;
4805 /* Returns number of induction variable candidates in the set IVS. */
4807 static unsigned
4808 iv_ca_n_cands (struct iv_ca *ivs)
4810 return ivs->n_cands;
4813 /* Free the list of changes DELTA. */
4815 static void
4816 iv_ca_delta_free (struct iv_ca_delta **delta)
4818 struct iv_ca_delta *act, *next;
4820 for (act = *delta; act; act = next)
4822 next = act->next_change;
4823 free (act);
4826 *delta = NULL;
4829 /* Allocates new iv candidates assignment. */
4831 static struct iv_ca *
4832 iv_ca_new (struct ivopts_data *data)
4834 struct iv_ca *nw = XNEW (struct iv_ca);
4836 nw->upto = 0;
4837 nw->bad_uses = 0;
4838 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4839 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4840 nw->cands = BITMAP_ALLOC (NULL);
4841 nw->n_cands = 0;
4842 nw->n_regs = 0;
4843 nw->cand_use_cost = zero_cost;
4844 nw->cand_cost = 0;
4845 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4846 nw->cost = zero_cost;
4848 return nw;
4851 /* Free memory occupied by the set IVS. */
4853 static void
4854 iv_ca_free (struct iv_ca **ivs)
4856 free ((*ivs)->cand_for_use);
4857 free ((*ivs)->n_cand_uses);
4858 BITMAP_FREE ((*ivs)->cands);
4859 free ((*ivs)->n_invariant_uses);
4860 free (*ivs);
4861 *ivs = NULL;
4864 /* Dumps IVS to FILE. */
4866 static void
4867 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4869 const char *pref = " invariants ";
4870 unsigned i;
4871 comp_cost cost = iv_ca_cost (ivs);
4873 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4874 bitmap_print (file, ivs->cands, " candidates ","\n");
4876 for (i = 1; i <= data->max_inv_id; i++)
4877 if (ivs->n_invariant_uses[i])
4879 fprintf (file, "%s%d", pref, i);
4880 pref = ", ";
4882 fprintf (file, "\n");
4885 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4886 new set, and store differences in DELTA. Number of induction variables
4887 in the new set is stored to N_IVS. */
4889 static comp_cost
4890 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4891 struct iv_cand *cand, struct iv_ca_delta **delta,
4892 unsigned *n_ivs)
4894 unsigned i;
4895 comp_cost cost;
4896 struct iv_use *use;
4897 struct cost_pair *old_cp, *new_cp;
4899 *delta = NULL;
4900 for (i = 0; i < ivs->upto; i++)
4902 use = iv_use (data, i);
4903 old_cp = iv_ca_cand_for_use (ivs, use);
4905 if (old_cp
4906 && old_cp->cand == cand)
4907 continue;
4909 new_cp = get_use_iv_cost (data, use, cand);
4910 if (!new_cp)
4911 continue;
4913 if (!iv_ca_has_deps (ivs, new_cp))
4914 continue;
4916 if (!cheaper_cost_pair (new_cp, old_cp))
4917 continue;
4919 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4922 iv_ca_delta_commit (data, ivs, *delta, true);
4923 cost = iv_ca_cost (ivs);
4924 if (n_ivs)
4925 *n_ivs = iv_ca_n_cands (ivs);
4926 iv_ca_delta_commit (data, ivs, *delta, false);
4928 return cost;
4931 /* Try narrowing set IVS by removing CAND. Return the cost of
4932 the new set and store the differences in DELTA. */
4934 static comp_cost
4935 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4936 struct iv_cand *cand, struct iv_ca_delta **delta)
4938 unsigned i, ci;
4939 struct iv_use *use;
4940 struct cost_pair *old_cp, *new_cp, *cp;
4941 bitmap_iterator bi;
4942 struct iv_cand *cnd;
4943 comp_cost cost;
4945 *delta = NULL;
4946 for (i = 0; i < n_iv_uses (data); i++)
4948 use = iv_use (data, i);
4950 old_cp = iv_ca_cand_for_use (ivs, use);
4951 if (old_cp->cand != cand)
4952 continue;
4954 new_cp = NULL;
4956 if (data->consider_all_candidates)
4958 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4960 if (ci == cand->id)
4961 continue;
4963 cnd = iv_cand (data, ci);
4965 cp = get_use_iv_cost (data, use, cnd);
4966 if (!cp)
4967 continue;
4968 if (!iv_ca_has_deps (ivs, cp))
4969 continue;
4971 if (!cheaper_cost_pair (cp, new_cp))
4972 continue;
4974 new_cp = cp;
4977 else
4979 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4981 if (ci == cand->id)
4982 continue;
4984 cnd = iv_cand (data, ci);
4986 cp = get_use_iv_cost (data, use, cnd);
4987 if (!cp)
4988 continue;
4989 if (!iv_ca_has_deps (ivs, cp))
4990 continue;
4992 if (!cheaper_cost_pair (cp, new_cp))
4993 continue;
4995 new_cp = cp;
4999 if (!new_cp)
5001 iv_ca_delta_free (delta);
5002 return infinite_cost;
5005 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5008 iv_ca_delta_commit (data, ivs, *delta, true);
5009 cost = iv_ca_cost (ivs);
5010 iv_ca_delta_commit (data, ivs, *delta, false);
5012 return cost;
5015 /* Try optimizing the set of candidates IVS by removing candidates different
5016 from to EXCEPT_CAND from it. Return cost of the new set, and store
5017 differences in DELTA. */
5019 static comp_cost
5020 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5021 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5023 bitmap_iterator bi;
5024 struct iv_ca_delta *act_delta, *best_delta;
5025 unsigned i;
5026 comp_cost best_cost, acost;
5027 struct iv_cand *cand;
5029 best_delta = NULL;
5030 best_cost = iv_ca_cost (ivs);
5032 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5034 cand = iv_cand (data, i);
5036 if (cand == except_cand)
5037 continue;
5039 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5041 if (compare_costs (acost, best_cost) < 0)
5043 best_cost = acost;
5044 iv_ca_delta_free (&best_delta);
5045 best_delta = act_delta;
5047 else
5048 iv_ca_delta_free (&act_delta);
5051 if (!best_delta)
5053 *delta = NULL;
5054 return best_cost;
5057 /* Recurse to possibly remove other unnecessary ivs. */
5058 iv_ca_delta_commit (data, ivs, best_delta, true);
5059 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5060 iv_ca_delta_commit (data, ivs, best_delta, false);
5061 *delta = iv_ca_delta_join (best_delta, *delta);
5062 return best_cost;
5065 /* Tries to extend the sets IVS in the best possible way in order
5066 to express the USE. */
5068 static bool
5069 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5070 struct iv_use *use)
5072 comp_cost best_cost, act_cost;
5073 unsigned i;
5074 bitmap_iterator bi;
5075 struct iv_cand *cand;
5076 struct iv_ca_delta *best_delta = NULL, *act_delta;
5077 struct cost_pair *cp;
5079 iv_ca_add_use (data, ivs, use);
5080 best_cost = iv_ca_cost (ivs);
5082 cp = iv_ca_cand_for_use (ivs, use);
5083 if (cp)
5085 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5086 iv_ca_set_no_cp (data, ivs, use);
5089 /* First try important candidates not based on any memory object. Only if
5090 this fails, try the specific ones. Rationale -- in loops with many
5091 variables the best choice often is to use just one generic biv. If we
5092 added here many ivs specific to the uses, the optimization algorithm later
5093 would be likely to get stuck in a local minimum, thus causing us to create
5094 too many ivs. The approach from few ivs to more seems more likely to be
5095 successful -- starting from few ivs, replacing an expensive use by a
5096 specific iv should always be a win. */
5097 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5099 cand = iv_cand (data, i);
5101 if (cand->iv->base_object != NULL_TREE)
5102 continue;
5104 if (iv_ca_cand_used_p (ivs, cand))
5105 continue;
5107 cp = get_use_iv_cost (data, use, cand);
5108 if (!cp)
5109 continue;
5111 iv_ca_set_cp (data, ivs, use, cp);
5112 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5113 iv_ca_set_no_cp (data, ivs, use);
5114 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5116 if (compare_costs (act_cost, best_cost) < 0)
5118 best_cost = act_cost;
5120 iv_ca_delta_free (&best_delta);
5121 best_delta = act_delta;
5123 else
5124 iv_ca_delta_free (&act_delta);
5127 if (infinite_cost_p (best_cost))
5129 for (i = 0; i < use->n_map_members; i++)
5131 cp = use->cost_map + i;
5132 cand = cp->cand;
5133 if (!cand)
5134 continue;
5136 /* Already tried this. */
5137 if (cand->important && cand->iv->base_object == NULL_TREE)
5138 continue;
5140 if (iv_ca_cand_used_p (ivs, cand))
5141 continue;
5143 act_delta = NULL;
5144 iv_ca_set_cp (data, ivs, use, cp);
5145 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5146 iv_ca_set_no_cp (data, ivs, use);
5147 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5148 cp, act_delta);
5150 if (compare_costs (act_cost, best_cost) < 0)
5152 best_cost = act_cost;
5154 if (best_delta)
5155 iv_ca_delta_free (&best_delta);
5156 best_delta = act_delta;
5158 else
5159 iv_ca_delta_free (&act_delta);
5163 iv_ca_delta_commit (data, ivs, best_delta, true);
5164 iv_ca_delta_free (&best_delta);
5166 return !infinite_cost_p (best_cost);
5169 /* Finds an initial assignment of candidates to uses. */
5171 static struct iv_ca *
5172 get_initial_solution (struct ivopts_data *data)
5174 struct iv_ca *ivs = iv_ca_new (data);
5175 unsigned i;
5177 for (i = 0; i < n_iv_uses (data); i++)
5178 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5180 iv_ca_free (&ivs);
5181 return NULL;
5184 return ivs;
5187 /* Tries to improve set of induction variables IVS. */
5189 static bool
5190 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5192 unsigned i, n_ivs;
5193 comp_cost acost, best_cost = iv_ca_cost (ivs);
5194 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5195 struct iv_cand *cand;
5197 /* Try extending the set of induction variables by one. */
5198 for (i = 0; i < n_iv_cands (data); i++)
5200 cand = iv_cand (data, i);
5202 if (iv_ca_cand_used_p (ivs, cand))
5203 continue;
5205 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5206 if (!act_delta)
5207 continue;
5209 /* If we successfully added the candidate and the set is small enough,
5210 try optimizing it by removing other candidates. */
5211 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5213 iv_ca_delta_commit (data, ivs, act_delta, true);
5214 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5215 iv_ca_delta_commit (data, ivs, act_delta, false);
5216 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5219 if (compare_costs (acost, best_cost) < 0)
5221 best_cost = acost;
5222 iv_ca_delta_free (&best_delta);
5223 best_delta = act_delta;
5225 else
5226 iv_ca_delta_free (&act_delta);
5229 if (!best_delta)
5231 /* Try removing the candidates from the set instead. */
5232 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5234 /* Nothing more we can do. */
5235 if (!best_delta)
5236 return false;
5239 iv_ca_delta_commit (data, ivs, best_delta, true);
5240 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5241 iv_ca_delta_free (&best_delta);
5242 return true;
5245 /* Attempts to find the optimal set of induction variables. We do simple
5246 greedy heuristic -- we try to replace at most one candidate in the selected
5247 solution and remove the unused ivs while this improves the cost. */
5249 static struct iv_ca *
5250 find_optimal_iv_set (struct ivopts_data *data)
5252 unsigned i;
5253 struct iv_ca *set;
5254 struct iv_use *use;
5256 /* Get the initial solution. */
5257 set = get_initial_solution (data);
5258 if (!set)
5260 if (dump_file && (dump_flags & TDF_DETAILS))
5261 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5262 return NULL;
5265 if (dump_file && (dump_flags & TDF_DETAILS))
5267 fprintf (dump_file, "Initial set of candidates:\n");
5268 iv_ca_dump (data, dump_file, set);
5271 while (try_improve_iv_set (data, set))
5273 if (dump_file && (dump_flags & TDF_DETAILS))
5275 fprintf (dump_file, "Improved to:\n");
5276 iv_ca_dump (data, dump_file, set);
5280 if (dump_file && (dump_flags & TDF_DETAILS))
5282 comp_cost cost = iv_ca_cost (set);
5283 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5286 for (i = 0; i < n_iv_uses (data); i++)
5288 use = iv_use (data, i);
5289 use->selected = iv_ca_cand_for_use (set, use)->cand;
5292 return set;
5295 /* Creates a new induction variable corresponding to CAND. */
5297 static void
5298 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5300 gimple_stmt_iterator incr_pos;
5301 tree base;
5302 bool after = false;
5304 if (!cand->iv)
5305 return;
5307 switch (cand->pos)
5309 case IP_NORMAL:
5310 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5311 break;
5313 case IP_END:
5314 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5315 after = true;
5316 break;
5318 case IP_AFTER_USE:
5319 after = true;
5320 /* fall through */
5321 case IP_BEFORE_USE:
5322 incr_pos = gsi_for_stmt (cand->incremented_at);
5323 break;
5325 case IP_ORIGINAL:
5326 /* Mark that the iv is preserved. */
5327 name_info (data, cand->var_before)->preserve_biv = true;
5328 name_info (data, cand->var_after)->preserve_biv = true;
5330 /* Rewrite the increment so that it uses var_before directly. */
5331 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5333 return;
5336 gimple_add_tmp_var (cand->var_before);
5337 add_referenced_var (cand->var_before);
5339 base = unshare_expr (cand->iv->base);
5341 create_iv (base, unshare_expr (cand->iv->step),
5342 cand->var_before, data->current_loop,
5343 &incr_pos, after, &cand->var_before, &cand->var_after);
5346 /* Creates new induction variables described in SET. */
5348 static void
5349 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5351 unsigned i;
5352 struct iv_cand *cand;
5353 bitmap_iterator bi;
5355 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5357 cand = iv_cand (data, i);
5358 create_new_iv (data, cand);
5363 /* Rewrites USE (definition of iv used in a nonlinear expression)
5364 using candidate CAND. */
5366 static void
5367 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5368 struct iv_use *use, struct iv_cand *cand)
5370 tree comp;
5371 tree op, tgt;
5372 gimple ass;
5373 gimple_stmt_iterator bsi;
5375 /* An important special case -- if we are asked to express value of
5376 the original iv by itself, just exit; there is no need to
5377 introduce a new computation (that might also need casting the
5378 variable to unsigned and back). */
5379 if (cand->pos == IP_ORIGINAL
5380 && cand->incremented_at == use->stmt)
5382 tree step, ctype, utype;
5383 enum tree_code incr_code = PLUS_EXPR, old_code;
5385 gcc_assert (is_gimple_assign (use->stmt));
5386 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5388 step = cand->iv->step;
5389 ctype = TREE_TYPE (step);
5390 utype = TREE_TYPE (cand->var_after);
5391 if (TREE_CODE (step) == NEGATE_EXPR)
5393 incr_code = MINUS_EXPR;
5394 step = TREE_OPERAND (step, 0);
5397 /* Check whether we may leave the computation unchanged.
5398 This is the case only if it does not rely on other
5399 computations in the loop -- otherwise, the computation
5400 we rely upon may be removed in remove_unused_ivs,
5401 thus leading to ICE. */
5402 old_code = gimple_assign_rhs_code (use->stmt);
5403 if (old_code == PLUS_EXPR
5404 || old_code == MINUS_EXPR
5405 || old_code == POINTER_PLUS_EXPR)
5407 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5408 op = gimple_assign_rhs2 (use->stmt);
5409 else if (old_code != MINUS_EXPR
5410 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5411 op = gimple_assign_rhs1 (use->stmt);
5412 else
5413 op = NULL_TREE;
5415 else
5416 op = NULL_TREE;
5418 if (op
5419 && (TREE_CODE (op) == INTEGER_CST
5420 || operand_equal_p (op, step, 0)))
5421 return;
5423 /* Otherwise, add the necessary computations to express
5424 the iv. */
5425 op = fold_convert (ctype, cand->var_before);
5426 comp = fold_convert (utype,
5427 build2 (incr_code, ctype, op,
5428 unshare_expr (step)));
5430 else
5432 comp = get_computation (data->current_loop, use, cand);
5433 gcc_assert (comp != NULL_TREE);
5436 switch (gimple_code (use->stmt))
5438 case GIMPLE_PHI:
5439 tgt = PHI_RESULT (use->stmt);
5441 /* If we should keep the biv, do not replace it. */
5442 if (name_info (data, tgt)->preserve_biv)
5443 return;
5445 bsi = gsi_after_labels (gimple_bb (use->stmt));
5446 break;
5448 case GIMPLE_ASSIGN:
5449 tgt = gimple_assign_lhs (use->stmt);
5450 bsi = gsi_for_stmt (use->stmt);
5451 break;
5453 default:
5454 gcc_unreachable ();
5457 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5458 true, GSI_SAME_STMT);
5460 if (gimple_code (use->stmt) == GIMPLE_PHI)
5462 ass = gimple_build_assign (tgt, op);
5463 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5465 bsi = gsi_for_stmt (use->stmt);
5466 remove_phi_node (&bsi, false);
5468 else
5470 gimple_assign_set_rhs_from_tree (&bsi, op);
5471 use->stmt = gsi_stmt (bsi);
5475 /* Replaces ssa name in index IDX by its basic variable. Callback for
5476 for_each_index. */
5478 static bool
5479 idx_remove_ssa_names (tree base, tree *idx,
5480 void *data ATTRIBUTE_UNUSED)
5482 tree *op;
5484 if (TREE_CODE (*idx) == SSA_NAME)
5485 *idx = SSA_NAME_VAR (*idx);
5487 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5489 op = &TREE_OPERAND (base, 2);
5490 if (*op
5491 && TREE_CODE (*op) == SSA_NAME)
5492 *op = SSA_NAME_VAR (*op);
5493 op = &TREE_OPERAND (base, 3);
5494 if (*op
5495 && TREE_CODE (*op) == SSA_NAME)
5496 *op = SSA_NAME_VAR (*op);
5499 return true;
5502 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5504 static tree
5505 unshare_and_remove_ssa_names (tree ref)
5507 ref = unshare_expr (ref);
5508 for_each_index (&ref, idx_remove_ssa_names, NULL);
5510 return ref;
5513 /* Copies the reference information from OLD_REF to NEW_REF. */
5515 static void
5516 copy_ref_info (tree new_ref, tree old_ref)
5518 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5519 copy_mem_ref_info (new_ref, old_ref);
5520 else
5522 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5523 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5524 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5528 /* Rewrites USE (address that is an iv) using candidate CAND. */
5530 static void
5531 rewrite_use_address (struct ivopts_data *data,
5532 struct iv_use *use, struct iv_cand *cand)
5534 aff_tree aff;
5535 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5536 tree base_hint = NULL_TREE;
5537 tree ref;
5538 bool ok;
5540 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5541 gcc_assert (ok);
5542 unshare_aff_combination (&aff);
5544 /* To avoid undefined overflow problems, all IV candidates use unsigned
5545 integer types. The drawback is that this makes it impossible for
5546 create_mem_ref to distinguish an IV that is based on a memory object
5547 from one that represents simply an offset.
5549 To work around this problem, we pass a hint to create_mem_ref that
5550 indicates which variable (if any) in aff is an IV based on a memory
5551 object. Note that we only consider the candidate. If this is not
5552 based on an object, the base of the reference is in some subexpression
5553 of the use -- but these will use pointer types, so they are recognized
5554 by the create_mem_ref heuristics anyway. */
5555 if (cand->iv->base_object)
5556 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5558 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5559 data->speed);
5560 copy_ref_info (ref, *use->op_p);
5561 *use->op_p = ref;
5564 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5565 candidate CAND. */
5567 static void
5568 rewrite_use_compare (struct ivopts_data *data,
5569 struct iv_use *use, struct iv_cand *cand)
5571 tree comp, *var_p, op, bound;
5572 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5573 enum tree_code compare;
5574 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5575 bool ok;
5577 bound = cp->value;
5578 if (bound)
5580 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5581 tree var_type = TREE_TYPE (var);
5582 gimple_seq stmts;
5584 compare = iv_elimination_compare (data, use);
5585 bound = unshare_expr (fold_convert (var_type, bound));
5586 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5587 if (stmts)
5588 gsi_insert_seq_on_edge_immediate (
5589 loop_preheader_edge (data->current_loop),
5590 stmts);
5592 gimple_cond_set_lhs (use->stmt, var);
5593 gimple_cond_set_code (use->stmt, compare);
5594 gimple_cond_set_rhs (use->stmt, op);
5595 return;
5598 /* The induction variable elimination failed; just express the original
5599 giv. */
5600 comp = get_computation (data->current_loop, use, cand);
5601 gcc_assert (comp != NULL_TREE);
5603 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5604 gcc_assert (ok);
5606 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5607 true, GSI_SAME_STMT);
5610 /* Rewrites USE using candidate CAND. */
5612 static void
5613 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5615 switch (use->type)
5617 case USE_NONLINEAR_EXPR:
5618 rewrite_use_nonlinear_expr (data, use, cand);
5619 break;
5621 case USE_ADDRESS:
5622 rewrite_use_address (data, use, cand);
5623 break;
5625 case USE_COMPARE:
5626 rewrite_use_compare (data, use, cand);
5627 break;
5629 default:
5630 gcc_unreachable ();
5633 update_stmt (use->stmt);
5636 /* Rewrite the uses using the selected induction variables. */
5638 static void
5639 rewrite_uses (struct ivopts_data *data)
5641 unsigned i;
5642 struct iv_cand *cand;
5643 struct iv_use *use;
5645 for (i = 0; i < n_iv_uses (data); i++)
5647 use = iv_use (data, i);
5648 cand = use->selected;
5649 gcc_assert (cand);
5651 rewrite_use (data, use, cand);
5655 /* Removes the ivs that are not used after rewriting. */
5657 static void
5658 remove_unused_ivs (struct ivopts_data *data)
5660 unsigned j;
5661 bitmap_iterator bi;
5662 bitmap toremove = BITMAP_ALLOC (NULL);
5664 /* Figure out an order in which to release SSA DEFs so that we don't
5665 release something that we'd have to propagate into a debug stmt
5666 afterwards. */
5667 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5669 struct version_info *info;
5671 info = ver_info (data, j);
5672 if (info->iv
5673 && !integer_zerop (info->iv->step)
5674 && !info->inv_id
5675 && !info->iv->have_use_for
5676 && !info->preserve_biv)
5677 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5680 release_defs_bitset (toremove);
5682 BITMAP_FREE (toremove);
5685 /* Frees data allocated by the optimization of a single loop. */
5687 static void
5688 free_loop_data (struct ivopts_data *data)
5690 unsigned i, j;
5691 bitmap_iterator bi;
5692 tree obj;
5694 if (data->niters)
5696 pointer_map_destroy (data->niters);
5697 data->niters = NULL;
5700 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5702 struct version_info *info;
5704 info = ver_info (data, i);
5705 if (info->iv)
5706 free (info->iv);
5707 info->iv = NULL;
5708 info->has_nonlin_use = false;
5709 info->preserve_biv = false;
5710 info->inv_id = 0;
5712 bitmap_clear (data->relevant);
5713 bitmap_clear (data->important_candidates);
5715 for (i = 0; i < n_iv_uses (data); i++)
5717 struct iv_use *use = iv_use (data, i);
5719 free (use->iv);
5720 BITMAP_FREE (use->related_cands);
5721 for (j = 0; j < use->n_map_members; j++)
5722 if (use->cost_map[j].depends_on)
5723 BITMAP_FREE (use->cost_map[j].depends_on);
5724 free (use->cost_map);
5725 free (use);
5727 VEC_truncate (iv_use_p, data->iv_uses, 0);
5729 for (i = 0; i < n_iv_cands (data); i++)
5731 struct iv_cand *cand = iv_cand (data, i);
5733 if (cand->iv)
5734 free (cand->iv);
5735 if (cand->depends_on)
5736 BITMAP_FREE (cand->depends_on);
5737 free (cand);
5739 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5741 if (data->version_info_size < num_ssa_names)
5743 data->version_info_size = 2 * num_ssa_names;
5744 free (data->version_info);
5745 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5748 data->max_inv_id = 0;
5750 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5751 SET_DECL_RTL (obj, NULL_RTX);
5753 VEC_truncate (tree, decl_rtl_to_reset, 0);
5756 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5757 loop tree. */
5759 static void
5760 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5762 free_loop_data (data);
5763 free (data->version_info);
5764 BITMAP_FREE (data->relevant);
5765 BITMAP_FREE (data->important_candidates);
5767 VEC_free (tree, heap, decl_rtl_to_reset);
5768 VEC_free (iv_use_p, heap, data->iv_uses);
5769 VEC_free (iv_cand_p, heap, data->iv_candidates);
5772 /* Optimizes the LOOP. Returns true if anything changed. */
5774 static bool
5775 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5777 bool changed = false;
5778 struct iv_ca *iv_ca;
5779 edge exit;
5780 basic_block *body;
5782 gcc_assert (!data->niters);
5783 data->current_loop = loop;
5784 data->speed = optimize_loop_for_speed_p (loop);
5786 if (dump_file && (dump_flags & TDF_DETAILS))
5788 fprintf (dump_file, "Processing loop %d\n", loop->num);
5790 exit = single_dom_exit (loop);
5791 if (exit)
5793 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5794 exit->src->index, exit->dest->index);
5795 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5796 fprintf (dump_file, "\n");
5799 fprintf (dump_file, "\n");
5802 body = get_loop_body (loop);
5803 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5804 free (body);
5806 /* For each ssa name determines whether it behaves as an induction variable
5807 in some loop. */
5808 if (!find_induction_variables (data))
5809 goto finish;
5811 /* Finds interesting uses (item 1). */
5812 find_interesting_uses (data);
5813 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5814 goto finish;
5816 /* Finds candidates for the induction variables (item 2). */
5817 find_iv_candidates (data);
5819 /* Calculates the costs (item 3, part 1). */
5820 determine_iv_costs (data);
5821 determine_use_iv_costs (data);
5822 determine_set_costs (data);
5824 /* Find the optimal set of induction variables (item 3, part 2). */
5825 iv_ca = find_optimal_iv_set (data);
5826 if (!iv_ca)
5827 goto finish;
5828 changed = true;
5830 /* Create the new induction variables (item 4, part 1). */
5831 create_new_ivs (data, iv_ca);
5832 iv_ca_free (&iv_ca);
5834 /* Rewrite the uses (item 4, part 2). */
5835 rewrite_uses (data);
5837 /* Remove the ivs that are unused after rewriting. */
5838 remove_unused_ivs (data);
5840 /* We have changed the structure of induction variables; it might happen
5841 that definitions in the scev database refer to some of them that were
5842 eliminated. */
5843 scev_reset ();
5845 finish:
5846 free_loop_data (data);
5848 return changed;
5851 /* Main entry point. Optimizes induction variables in loops. */
5853 void
5854 tree_ssa_iv_optimize (void)
5856 struct loop *loop;
5857 struct ivopts_data data;
5858 loop_iterator li;
5860 tree_ssa_iv_optimize_init (&data);
5862 /* Optimize the loops starting with the innermost ones. */
5863 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5865 if (dump_file && (dump_flags & TDF_DETAILS))
5866 flow_loop_dump (loop, dump_file, NULL, 1);
5868 tree_ssa_iv_optimize_loop (&data, loop);
5871 tree_ssa_iv_optimize_finalize (&data);