Merge branch 'master' into python
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob32b8935265388a3697aea55ff0fd5a39e3cb6f84
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
93 /* FIXME: Expressions are expanded to RTL in this pass to determine the
94 cost of different addressing modes. This should be moved to a TBD
95 interface between the GIMPLE and RTL worlds. */
96 #include "expr.h"
98 /* The infinite cost. */
99 #define INFTY 10000000
101 /* The expected number of loop iterations. TODO -- use profiling instead of
102 this. */
103 #define AVG_LOOP_NITER(LOOP) 5
106 /* Representation of the induction variable. */
107 struct iv
109 tree base; /* Initial value of the iv. */
110 tree base_object; /* A memory object to that the induction variable points. */
111 tree step; /* Step of the iv (constant only). */
112 tree ssa_name; /* The ssa name with the value. */
113 bool biv_p; /* Is it a biv? */
114 bool have_use_for; /* Do we already have a use for it? */
115 unsigned use_id; /* The identifier in the use if it is the case. */
118 /* Per-ssa version information (induction variable descriptions, etc.). */
119 struct version_info
121 tree name; /* The ssa name. */
122 struct iv *iv; /* Induction variable description. */
123 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
124 an expression that is not an induction variable. */
125 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 unsigned inv_id; /* Id of an invariant. */
129 /* Types of uses. */
130 enum use_type
132 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
133 USE_ADDRESS, /* Use in an address. */
134 USE_COMPARE /* Use is a compare. */
137 /* Cost of a computation. */
138 typedef struct
140 int cost; /* The runtime cost. */
141 unsigned complexity; /* The estimate of the complexity of the code for
142 the computation (in no concrete units --
143 complexity field should be larger for more
144 complex expressions and addressing modes). */
145 } comp_cost;
147 static const comp_cost zero_cost = {0, 0};
148 static const comp_cost infinite_cost = {INFTY, INFTY};
150 /* The candidate - cost pair. */
151 struct cost_pair
153 struct iv_cand *cand; /* The candidate. */
154 comp_cost cost; /* The cost. */
155 bitmap depends_on; /* The list of invariants that have to be
156 preserved. */
157 tree value; /* For final value elimination, the expression for
158 the final value of the iv. For iv elimination,
159 the new bound to compare with. */
162 /* Use. */
163 struct iv_use
165 unsigned id; /* The id of the use. */
166 enum use_type type; /* Type of the use. */
167 struct iv *iv; /* The induction variable it is based on. */
168 gimple stmt; /* Statement in that it occurs. */
169 tree *op_p; /* The place where it occurs. */
170 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 important ones. */
173 unsigned n_map_members; /* Number of candidates in the cost_map list. */
174 struct cost_pair *cost_map;
175 /* The costs wrto the iv candidates. */
177 struct iv_cand *selected;
178 /* The selected candidate. */
181 /* The position where the iv is computed. */
182 enum iv_position
184 IP_NORMAL, /* At the end, just before the exit condition. */
185 IP_END, /* At the end of the latch block. */
186 IP_BEFORE_USE, /* Immediately before a specific use. */
187 IP_AFTER_USE, /* Immediately after a specific use. */
188 IP_ORIGINAL /* The original biv. */
191 /* The induction variable candidate. */
192 struct iv_cand
194 unsigned id; /* The number of the candidate. */
195 bool important; /* Whether this is an "important" candidate, i.e. such
196 that it should be considered by all uses. */
197 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
198 gimple incremented_at;/* For original biv, the statement where it is
199 incremented. */
200 tree var_before; /* The variable used for it before increment. */
201 tree var_after; /* The variable used for it after increment. */
202 struct iv *iv; /* The value of the candidate. NULL for
203 "pseudocandidate" used to indicate the possibility
204 to replace the final value of an iv by direct
205 computation of the value. */
206 unsigned cost; /* Cost of the candidate. */
207 unsigned cost_step; /* Cost of the candidate's increment operation. */
208 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
209 where it is incremented. */
210 bitmap depends_on; /* The list of invariants that are used in step of the
211 biv. */
214 /* The data used by the induction variable optimizations. */
216 typedef struct iv_use *iv_use_p;
217 DEF_VEC_P(iv_use_p);
218 DEF_VEC_ALLOC_P(iv_use_p,heap);
220 typedef struct iv_cand *iv_cand_p;
221 DEF_VEC_P(iv_cand_p);
222 DEF_VEC_ALLOC_P(iv_cand_p,heap);
224 struct ivopts_data
226 /* The currently optimized loop. */
227 struct loop *current_loop;
229 /* Numbers of iterations for all exits of the current loop. */
230 struct pointer_map_t *niters;
232 /* Number of registers used in it. */
233 unsigned regs_used;
235 /* The size of version_info array allocated. */
236 unsigned version_info_size;
238 /* The array of information for the ssa names. */
239 struct version_info *version_info;
241 /* The bitmap of indices in version_info whose value was changed. */
242 bitmap relevant;
244 /* The uses of induction variables. */
245 VEC(iv_use_p,heap) *iv_uses;
247 /* The candidates. */
248 VEC(iv_cand_p,heap) *iv_candidates;
250 /* A bitmap of important candidates. */
251 bitmap important_candidates;
253 /* The maximum invariant id. */
254 unsigned max_inv_id;
256 /* Whether to consider just related and important candidates when replacing a
257 use. */
258 bool consider_all_candidates;
260 /* Are we optimizing for speed? */
261 bool speed;
264 /* An assignment of iv candidates to uses. */
266 struct iv_ca
268 /* The number of uses covered by the assignment. */
269 unsigned upto;
271 /* Number of uses that cannot be expressed by the candidates in the set. */
272 unsigned bad_uses;
274 /* Candidate assigned to a use, together with the related costs. */
275 struct cost_pair **cand_for_use;
277 /* Number of times each candidate is used. */
278 unsigned *n_cand_uses;
280 /* The candidates used. */
281 bitmap cands;
283 /* The number of candidates in the set. */
284 unsigned n_cands;
286 /* Total number of registers needed. */
287 unsigned n_regs;
289 /* Total cost of expressing uses. */
290 comp_cost cand_use_cost;
292 /* Total cost of candidates. */
293 unsigned cand_cost;
295 /* Number of times each invariant is used. */
296 unsigned *n_invariant_uses;
298 /* Total cost of the assignment. */
299 comp_cost cost;
302 /* Difference of two iv candidate assignments. */
304 struct iv_ca_delta
306 /* Changed use. */
307 struct iv_use *use;
309 /* An old assignment (for rollback purposes). */
310 struct cost_pair *old_cp;
312 /* A new assignment. */
313 struct cost_pair *new_cp;
315 /* Next change in the list. */
316 struct iv_ca_delta *next_change;
319 /* Bound on number of candidates below that all candidates are considered. */
321 #define CONSIDER_ALL_CANDIDATES_BOUND \
322 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
324 /* If there are more iv occurrences, we just give up (it is quite unlikely that
325 optimizing such a loop would help, and it would take ages). */
327 #define MAX_CONSIDERED_USES \
328 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
330 /* If there are at most this number of ivs in the set, try removing unnecessary
331 ivs from the set always. */
333 #define ALWAYS_PRUNE_CAND_SET_BOUND \
334 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
336 /* The list of trees for that the decl_rtl field must be reset is stored
337 here. */
339 static VEC(tree,heap) *decl_rtl_to_reset;
341 /* Number of uses recorded in DATA. */
343 static inline unsigned
344 n_iv_uses (struct ivopts_data *data)
346 return VEC_length (iv_use_p, data->iv_uses);
349 /* Ith use recorded in DATA. */
351 static inline struct iv_use *
352 iv_use (struct ivopts_data *data, unsigned i)
354 return VEC_index (iv_use_p, data->iv_uses, i);
357 /* Number of candidates recorded in DATA. */
359 static inline unsigned
360 n_iv_cands (struct ivopts_data *data)
362 return VEC_length (iv_cand_p, data->iv_candidates);
365 /* Ith candidate recorded in DATA. */
367 static inline struct iv_cand *
368 iv_cand (struct ivopts_data *data, unsigned i)
370 return VEC_index (iv_cand_p, data->iv_candidates, i);
373 /* The single loop exit if it dominates the latch, NULL otherwise. */
375 edge
376 single_dom_exit (struct loop *loop)
378 edge exit = single_exit (loop);
380 if (!exit)
381 return NULL;
383 if (!just_once_each_iteration_p (loop, exit->src))
384 return NULL;
386 return exit;
389 /* Dumps information about the induction variable IV to FILE. */
391 extern void dump_iv (FILE *, struct iv *);
392 void
393 dump_iv (FILE *file, struct iv *iv)
395 if (iv->ssa_name)
397 fprintf (file, "ssa name ");
398 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
399 fprintf (file, "\n");
402 fprintf (file, " type ");
403 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
404 fprintf (file, "\n");
406 if (iv->step)
408 fprintf (file, " base ");
409 print_generic_expr (file, iv->base, TDF_SLIM);
410 fprintf (file, "\n");
412 fprintf (file, " step ");
413 print_generic_expr (file, iv->step, TDF_SLIM);
414 fprintf (file, "\n");
416 else
418 fprintf (file, " invariant ");
419 print_generic_expr (file, iv->base, TDF_SLIM);
420 fprintf (file, "\n");
423 if (iv->base_object)
425 fprintf (file, " base object ");
426 print_generic_expr (file, iv->base_object, TDF_SLIM);
427 fprintf (file, "\n");
430 if (iv->biv_p)
431 fprintf (file, " is a biv\n");
434 /* Dumps information about the USE to FILE. */
436 extern void dump_use (FILE *, struct iv_use *);
437 void
438 dump_use (FILE *file, struct iv_use *use)
440 fprintf (file, "use %d\n", use->id);
442 switch (use->type)
444 case USE_NONLINEAR_EXPR:
445 fprintf (file, " generic\n");
446 break;
448 case USE_ADDRESS:
449 fprintf (file, " address\n");
450 break;
452 case USE_COMPARE:
453 fprintf (file, " compare\n");
454 break;
456 default:
457 gcc_unreachable ();
460 fprintf (file, " in statement ");
461 print_gimple_stmt (file, use->stmt, 0, 0);
462 fprintf (file, "\n");
464 fprintf (file, " at position ");
465 if (use->op_p)
466 print_generic_expr (file, *use->op_p, TDF_SLIM);
467 fprintf (file, "\n");
469 dump_iv (file, use->iv);
471 if (use->related_cands)
473 fprintf (file, " related candidates ");
474 dump_bitmap (file, use->related_cands);
478 /* Dumps information about the uses to FILE. */
480 extern void dump_uses (FILE *, struct ivopts_data *);
481 void
482 dump_uses (FILE *file, struct ivopts_data *data)
484 unsigned i;
485 struct iv_use *use;
487 for (i = 0; i < n_iv_uses (data); i++)
489 use = iv_use (data, i);
491 dump_use (file, use);
492 fprintf (file, "\n");
496 /* Dumps information about induction variable candidate CAND to FILE. */
498 extern void dump_cand (FILE *, struct iv_cand *);
499 void
500 dump_cand (FILE *file, struct iv_cand *cand)
502 struct iv *iv = cand->iv;
504 fprintf (file, "candidate %d%s\n",
505 cand->id, cand->important ? " (important)" : "");
507 if (cand->depends_on)
509 fprintf (file, " depends on ");
510 dump_bitmap (file, cand->depends_on);
513 if (!iv)
515 fprintf (file, " final value replacement\n");
516 return;
519 switch (cand->pos)
521 case IP_NORMAL:
522 fprintf (file, " incremented before exit test\n");
523 break;
525 case IP_BEFORE_USE:
526 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
527 break;
529 case IP_AFTER_USE:
530 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
531 break;
533 case IP_END:
534 fprintf (file, " incremented at end\n");
535 break;
537 case IP_ORIGINAL:
538 fprintf (file, " original biv\n");
539 break;
542 dump_iv (file, iv);
545 /* Returns the info for ssa version VER. */
547 static inline struct version_info *
548 ver_info (struct ivopts_data *data, unsigned ver)
550 return data->version_info + ver;
553 /* Returns the info for ssa name NAME. */
555 static inline struct version_info *
556 name_info (struct ivopts_data *data, tree name)
558 return ver_info (data, SSA_NAME_VERSION (name));
561 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
562 emitted in LOOP. */
564 static bool
565 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
567 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
569 gcc_assert (bb);
571 if (sbb == loop->latch)
572 return true;
574 if (sbb != bb)
575 return false;
577 return stmt == last_stmt (bb);
580 /* Returns true if STMT if after the place where the original induction
581 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
582 if the positions are identical. */
584 static bool
585 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
587 basic_block cand_bb = gimple_bb (cand->incremented_at);
588 basic_block stmt_bb = gimple_bb (stmt);
590 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
591 return false;
593 if (stmt_bb != cand_bb)
594 return true;
596 if (true_if_equal
597 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
598 return true;
599 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
602 /* Returns true if STMT if after the place where the induction variable
603 CAND is incremented in LOOP. */
605 static bool
606 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
608 switch (cand->pos)
610 case IP_END:
611 return false;
613 case IP_NORMAL:
614 return stmt_after_ip_normal_pos (loop, stmt);
616 case IP_ORIGINAL:
617 case IP_AFTER_USE:
618 return stmt_after_inc_pos (cand, stmt, false);
620 case IP_BEFORE_USE:
621 return stmt_after_inc_pos (cand, stmt, true);
623 default:
624 gcc_unreachable ();
628 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
630 static bool
631 abnormal_ssa_name_p (tree exp)
633 if (!exp)
634 return false;
636 if (TREE_CODE (exp) != SSA_NAME)
637 return false;
639 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
642 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
643 abnormal phi node. Callback for for_each_index. */
645 static bool
646 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
647 void *data ATTRIBUTE_UNUSED)
649 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
652 return false;
653 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
654 return false;
657 return !abnormal_ssa_name_p (*index);
660 /* Returns true if EXPR contains a ssa name that occurs in an
661 abnormal phi node. */
663 bool
664 contains_abnormal_ssa_name_p (tree expr)
666 enum tree_code code;
667 enum tree_code_class codeclass;
669 if (!expr)
670 return false;
672 code = TREE_CODE (expr);
673 codeclass = TREE_CODE_CLASS (code);
675 if (code == SSA_NAME)
676 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
678 if (code == INTEGER_CST
679 || is_gimple_min_invariant (expr))
680 return false;
682 if (code == ADDR_EXPR)
683 return !for_each_index (&TREE_OPERAND (expr, 0),
684 idx_contains_abnormal_ssa_name_p,
685 NULL);
687 if (code == COND_EXPR)
688 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
689 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
692 switch (codeclass)
694 case tcc_binary:
695 case tcc_comparison:
696 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
697 return true;
699 /* Fallthru. */
700 case tcc_unary:
701 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
702 return true;
704 break;
706 default:
707 gcc_unreachable ();
710 return false;
713 /* Returns tree describing number of iterations determined from
714 EXIT of DATA->current_loop, or NULL if something goes wrong. */
716 static tree
717 niter_for_exit (struct ivopts_data *data, edge exit)
719 struct tree_niter_desc desc;
720 tree niter;
721 void **slot;
723 if (!data->niters)
725 data->niters = pointer_map_create ();
726 slot = NULL;
728 else
729 slot = pointer_map_contains (data->niters, exit);
731 if (!slot)
733 /* Try to determine number of iterations. We must know it
734 unconditionally (i.e., without possibility of # of iterations
735 being zero). Also, we cannot safely work with ssa names that
736 appear in phi nodes on abnormal edges, so that we do not create
737 overlapping life ranges for them (PR 27283). */
738 if (number_of_iterations_exit (data->current_loop,
739 exit, &desc, true)
740 && integer_zerop (desc.may_be_zero)
741 && !contains_abnormal_ssa_name_p (desc.niter))
742 niter = desc.niter;
743 else
744 niter = NULL_TREE;
746 *pointer_map_insert (data->niters, exit) = niter;
748 else
749 niter = (tree) *slot;
751 return niter;
754 /* Returns tree describing number of iterations determined from
755 single dominating exit of DATA->current_loop, or NULL if something
756 goes wrong. */
758 static tree
759 niter_for_single_dom_exit (struct ivopts_data *data)
761 edge exit = single_dom_exit (data->current_loop);
763 if (!exit)
764 return NULL;
766 return niter_for_exit (data, exit);
769 /* Initializes data structures used by the iv optimization pass, stored
770 in DATA. */
772 static void
773 tree_ssa_iv_optimize_init (struct ivopts_data *data)
775 data->version_info_size = 2 * num_ssa_names;
776 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
777 data->relevant = BITMAP_ALLOC (NULL);
778 data->important_candidates = BITMAP_ALLOC (NULL);
779 data->max_inv_id = 0;
780 data->niters = NULL;
781 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
782 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
783 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
786 /* Returns a memory object to that EXPR points. In case we are able to
787 determine that it does not point to any such object, NULL is returned. */
789 static tree
790 determine_base_object (tree expr)
792 enum tree_code code = TREE_CODE (expr);
793 tree base, obj;
795 /* If this is a pointer casted to any type, we need to determine
796 the base object for the pointer; so handle conversions before
797 throwing away non-pointer expressions. */
798 if (CONVERT_EXPR_P (expr))
799 return determine_base_object (TREE_OPERAND (expr, 0));
801 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
802 return NULL_TREE;
804 switch (code)
806 case INTEGER_CST:
807 return NULL_TREE;
809 case ADDR_EXPR:
810 obj = TREE_OPERAND (expr, 0);
811 base = get_base_address (obj);
813 if (!base)
814 return expr;
816 if (TREE_CODE (base) == INDIRECT_REF)
817 return determine_base_object (TREE_OPERAND (base, 0));
819 return fold_convert (ptr_type_node,
820 build_fold_addr_expr (base));
822 case POINTER_PLUS_EXPR:
823 return determine_base_object (TREE_OPERAND (expr, 0));
825 case PLUS_EXPR:
826 case MINUS_EXPR:
827 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
828 gcc_unreachable ();
830 default:
831 return fold_convert (ptr_type_node, expr);
835 /* Allocates an induction variable with given initial value BASE and step STEP
836 for loop LOOP. */
838 static struct iv *
839 alloc_iv (tree base, tree step)
841 struct iv *iv = XCNEW (struct iv);
842 gcc_assert (step != NULL_TREE);
844 iv->base = base;
845 iv->base_object = determine_base_object (base);
846 iv->step = step;
847 iv->biv_p = false;
848 iv->have_use_for = false;
849 iv->use_id = 0;
850 iv->ssa_name = NULL_TREE;
852 return iv;
855 /* Sets STEP and BASE for induction variable IV. */
857 static void
858 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
860 struct version_info *info = name_info (data, iv);
862 gcc_assert (!info->iv);
864 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
865 info->iv = alloc_iv (base, step);
866 info->iv->ssa_name = iv;
869 /* Finds induction variable declaration for VAR. */
871 static struct iv *
872 get_iv (struct ivopts_data *data, tree var)
874 basic_block bb;
875 tree type = TREE_TYPE (var);
877 if (!POINTER_TYPE_P (type)
878 && !INTEGRAL_TYPE_P (type))
879 return NULL;
881 if (!name_info (data, var)->iv)
883 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
885 if (!bb
886 || !flow_bb_inside_loop_p (data->current_loop, bb))
887 set_iv (data, var, var, build_int_cst (type, 0));
890 return name_info (data, var)->iv;
893 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
894 not define a simple affine biv with nonzero step. */
896 static tree
897 determine_biv_step (gimple phi)
899 struct loop *loop = gimple_bb (phi)->loop_father;
900 tree name = PHI_RESULT (phi);
901 affine_iv iv;
903 if (!is_gimple_reg (name))
904 return NULL_TREE;
906 if (!simple_iv (loop, loop, name, &iv, true))
907 return NULL_TREE;
909 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
912 /* Finds basic ivs. */
914 static bool
915 find_bivs (struct ivopts_data *data)
917 gimple phi;
918 tree step, type, base;
919 bool found = false;
920 struct loop *loop = data->current_loop;
921 gimple_stmt_iterator psi;
923 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
925 phi = gsi_stmt (psi);
927 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
928 continue;
930 step = determine_biv_step (phi);
931 if (!step)
932 continue;
934 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
935 base = expand_simple_operations (base);
936 if (contains_abnormal_ssa_name_p (base)
937 || contains_abnormal_ssa_name_p (step))
938 continue;
940 type = TREE_TYPE (PHI_RESULT (phi));
941 base = fold_convert (type, base);
942 if (step)
944 if (POINTER_TYPE_P (type))
945 step = fold_convert (sizetype, step);
946 else
947 step = fold_convert (type, step);
950 set_iv (data, PHI_RESULT (phi), base, step);
951 found = true;
954 return found;
957 /* Marks basic ivs. */
959 static void
960 mark_bivs (struct ivopts_data *data)
962 gimple phi;
963 tree var;
964 struct iv *iv, *incr_iv;
965 struct loop *loop = data->current_loop;
966 basic_block incr_bb;
967 gimple_stmt_iterator psi;
969 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
971 phi = gsi_stmt (psi);
973 iv = get_iv (data, PHI_RESULT (phi));
974 if (!iv)
975 continue;
977 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
978 incr_iv = get_iv (data, var);
979 if (!incr_iv)
980 continue;
982 /* If the increment is in the subloop, ignore it. */
983 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
984 if (incr_bb->loop_father != data->current_loop
985 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
986 continue;
988 iv->biv_p = true;
989 incr_iv->biv_p = true;
993 /* Checks whether STMT defines a linear induction variable and stores its
994 parameters to IV. */
996 static bool
997 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
999 tree lhs;
1000 struct loop *loop = data->current_loop;
1002 iv->base = NULL_TREE;
1003 iv->step = NULL_TREE;
1005 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1006 return false;
1008 lhs = gimple_assign_lhs (stmt);
1009 if (TREE_CODE (lhs) != SSA_NAME)
1010 return false;
1012 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1013 return false;
1014 iv->base = expand_simple_operations (iv->base);
1016 if (contains_abnormal_ssa_name_p (iv->base)
1017 || contains_abnormal_ssa_name_p (iv->step))
1018 return false;
1020 return true;
1023 /* Finds general ivs in statement STMT. */
1025 static void
1026 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1028 affine_iv iv;
1030 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1031 return;
1033 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1036 /* Finds general ivs in basic block BB. */
1038 static void
1039 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1041 gimple_stmt_iterator bsi;
1043 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1044 find_givs_in_stmt (data, gsi_stmt (bsi));
1047 /* Finds general ivs. */
1049 static void
1050 find_givs (struct ivopts_data *data)
1052 struct loop *loop = data->current_loop;
1053 basic_block *body = get_loop_body_in_dom_order (loop);
1054 unsigned i;
1056 for (i = 0; i < loop->num_nodes; i++)
1057 find_givs_in_bb (data, body[i]);
1058 free (body);
1061 /* For each ssa name defined in LOOP determines whether it is an induction
1062 variable and if so, its initial value and step. */
1064 static bool
1065 find_induction_variables (struct ivopts_data *data)
1067 unsigned i;
1068 bitmap_iterator bi;
1070 if (!find_bivs (data))
1071 return false;
1073 find_givs (data);
1074 mark_bivs (data);
1076 if (dump_file && (dump_flags & TDF_DETAILS))
1078 tree niter = niter_for_single_dom_exit (data);
1080 if (niter)
1082 fprintf (dump_file, " number of iterations ");
1083 print_generic_expr (dump_file, niter, TDF_SLIM);
1084 fprintf (dump_file, "\n\n");
1087 fprintf (dump_file, "Induction variables:\n\n");
1089 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1091 if (ver_info (data, i)->iv)
1092 dump_iv (dump_file, ver_info (data, i)->iv);
1096 return true;
1099 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1101 static struct iv_use *
1102 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1103 gimple stmt, enum use_type use_type)
1105 struct iv_use *use = XCNEW (struct iv_use);
1107 use->id = n_iv_uses (data);
1108 use->type = use_type;
1109 use->iv = iv;
1110 use->stmt = stmt;
1111 use->op_p = use_p;
1112 use->related_cands = BITMAP_ALLOC (NULL);
1114 /* To avoid showing ssa name in the dumps, if it was not reset by the
1115 caller. */
1116 iv->ssa_name = NULL_TREE;
1118 if (dump_file && (dump_flags & TDF_DETAILS))
1119 dump_use (dump_file, use);
1121 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1123 return use;
1126 /* Checks whether OP is a loop-level invariant and if so, records it.
1127 NONLINEAR_USE is true if the invariant is used in a way we do not
1128 handle specially. */
1130 static void
1131 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1133 basic_block bb;
1134 struct version_info *info;
1136 if (TREE_CODE (op) != SSA_NAME
1137 || !is_gimple_reg (op))
1138 return;
1140 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1141 if (bb
1142 && flow_bb_inside_loop_p (data->current_loop, bb))
1143 return;
1145 info = name_info (data, op);
1146 info->name = op;
1147 info->has_nonlin_use |= nonlinear_use;
1148 if (!info->inv_id)
1149 info->inv_id = ++data->max_inv_id;
1150 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1153 /* Checks whether the use OP is interesting and if so, records it. */
1155 static struct iv_use *
1156 find_interesting_uses_op (struct ivopts_data *data, tree op)
1158 struct iv *iv;
1159 struct iv *civ;
1160 gimple stmt;
1161 struct iv_use *use;
1163 if (TREE_CODE (op) != SSA_NAME)
1164 return NULL;
1166 iv = get_iv (data, op);
1167 if (!iv)
1168 return NULL;
1170 if (iv->have_use_for)
1172 use = iv_use (data, iv->use_id);
1174 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1175 return use;
1178 if (integer_zerop (iv->step))
1180 record_invariant (data, op, true);
1181 return NULL;
1183 iv->have_use_for = true;
1185 civ = XNEW (struct iv);
1186 *civ = *iv;
1188 stmt = SSA_NAME_DEF_STMT (op);
1189 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1190 || is_gimple_assign (stmt));
1192 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1193 iv->use_id = use->id;
1195 return use;
1198 /* Given a condition in statement STMT, checks whether it is a compare
1199 of an induction variable and an invariant. If this is the case,
1200 CONTROL_VAR is set to location of the iv, BOUND to the location of
1201 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1202 induction variable descriptions, and true is returned. If this is not
1203 the case, CONTROL_VAR and BOUND are set to the arguments of the
1204 condition and false is returned. */
1206 static bool
1207 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1208 tree **control_var, tree **bound,
1209 struct iv **iv_var, struct iv **iv_bound)
1211 /* The objects returned when COND has constant operands. */
1212 static struct iv const_iv;
1213 static tree zero;
1214 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1215 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1216 bool ret = false;
1218 if (gimple_code (stmt) == GIMPLE_COND)
1220 op0 = gimple_cond_lhs_ptr (stmt);
1221 op1 = gimple_cond_rhs_ptr (stmt);
1223 else
1225 op0 = gimple_assign_rhs1_ptr (stmt);
1226 op1 = gimple_assign_rhs2_ptr (stmt);
1229 zero = integer_zero_node;
1230 const_iv.step = integer_zero_node;
1232 if (TREE_CODE (*op0) == SSA_NAME)
1233 iv0 = get_iv (data, *op0);
1234 if (TREE_CODE (*op1) == SSA_NAME)
1235 iv1 = get_iv (data, *op1);
1237 /* Exactly one of the compared values must be an iv, and the other one must
1238 be an invariant. */
1239 if (!iv0 || !iv1)
1240 goto end;
1242 if (integer_zerop (iv0->step))
1244 /* Control variable may be on the other side. */
1245 tmp_op = op0; op0 = op1; op1 = tmp_op;
1246 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1248 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1250 end:
1251 if (control_var)
1252 *control_var = op0;;
1253 if (iv_var)
1254 *iv_var = iv0;;
1255 if (bound)
1256 *bound = op1;
1257 if (iv_bound)
1258 *iv_bound = iv1;
1260 return ret;
1263 /* Checks whether the condition in STMT is interesting and if so,
1264 records it. */
1266 static void
1267 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1269 tree *var_p, *bound_p;
1270 struct iv *var_iv, *civ;
1272 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1274 find_interesting_uses_op (data, *var_p);
1275 find_interesting_uses_op (data, *bound_p);
1276 return;
1279 civ = XNEW (struct iv);
1280 *civ = *var_iv;
1281 record_use (data, NULL, civ, stmt, USE_COMPARE);
1284 /* Returns true if expression EXPR is obviously invariant in LOOP,
1285 i.e. if all its operands are defined outside of the LOOP. LOOP
1286 should not be the function body. */
1288 bool
1289 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1291 basic_block def_bb;
1292 unsigned i, len;
1294 gcc_assert (loop_depth (loop) > 0);
1296 if (is_gimple_min_invariant (expr))
1297 return true;
1299 if (TREE_CODE (expr) == SSA_NAME)
1301 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1302 if (def_bb
1303 && flow_bb_inside_loop_p (loop, def_bb))
1304 return false;
1306 return true;
1309 if (!EXPR_P (expr))
1310 return false;
1312 len = TREE_OPERAND_LENGTH (expr);
1313 for (i = 0; i < len; i++)
1314 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1315 return false;
1317 return true;
1320 /* Returns true if statement STMT is obviously invariant in LOOP,
1321 i.e. if all its operands on the RHS are defined outside of the LOOP.
1322 LOOP should not be the function body. */
1324 bool
1325 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1327 unsigned i;
1328 tree lhs;
1330 gcc_assert (loop_depth (loop) > 0);
1332 lhs = gimple_get_lhs (stmt);
1333 for (i = 0; i < gimple_num_ops (stmt); i++)
1335 tree op = gimple_op (stmt, i);
1336 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1337 return false;
1340 return true;
1343 /* Cumulates the steps of indices into DATA and replaces their values with the
1344 initial ones. Returns false when the value of the index cannot be determined.
1345 Callback for for_each_index. */
1347 struct ifs_ivopts_data
1349 struct ivopts_data *ivopts_data;
1350 gimple stmt;
1351 tree step;
1354 static bool
1355 idx_find_step (tree base, tree *idx, void *data)
1357 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1358 struct iv *iv;
1359 tree step, iv_base, iv_step, lbound, off;
1360 struct loop *loop = dta->ivopts_data->current_loop;
1362 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1363 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1364 return false;
1366 /* If base is a component ref, require that the offset of the reference
1367 be invariant. */
1368 if (TREE_CODE (base) == COMPONENT_REF)
1370 off = component_ref_field_offset (base);
1371 return expr_invariant_in_loop_p (loop, off);
1374 /* If base is array, first check whether we will be able to move the
1375 reference out of the loop (in order to take its address in strength
1376 reduction). In order for this to work we need both lower bound
1377 and step to be loop invariants. */
1378 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1380 /* Moreover, for a range, the size needs to be invariant as well. */
1381 if (TREE_CODE (base) == ARRAY_RANGE_REF
1382 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1383 return false;
1385 step = array_ref_element_size (base);
1386 lbound = array_ref_low_bound (base);
1388 if (!expr_invariant_in_loop_p (loop, step)
1389 || !expr_invariant_in_loop_p (loop, lbound))
1390 return false;
1393 if (TREE_CODE (*idx) != SSA_NAME)
1394 return true;
1396 iv = get_iv (dta->ivopts_data, *idx);
1397 if (!iv)
1398 return false;
1400 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1401 *&x[0], which is not folded and does not trigger the
1402 ARRAY_REF path below. */
1403 *idx = iv->base;
1405 if (integer_zerop (iv->step))
1406 return true;
1408 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1410 step = array_ref_element_size (base);
1412 /* We only handle addresses whose step is an integer constant. */
1413 if (TREE_CODE (step) != INTEGER_CST)
1414 return false;
1416 else
1417 /* The step for pointer arithmetics already is 1 byte. */
1418 step = build_int_cst (sizetype, 1);
1420 iv_base = iv->base;
1421 iv_step = iv->step;
1422 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1423 sizetype, &iv_base, &iv_step, dta->stmt,
1424 false))
1426 /* The index might wrap. */
1427 return false;
1430 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1431 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1433 return true;
1436 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1437 object is passed to it in DATA. */
1439 static bool
1440 idx_record_use (tree base, tree *idx,
1441 void *vdata)
1443 struct ivopts_data *data = (struct ivopts_data *) vdata;
1444 find_interesting_uses_op (data, *idx);
1445 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1447 find_interesting_uses_op (data, array_ref_element_size (base));
1448 find_interesting_uses_op (data, array_ref_low_bound (base));
1450 return true;
1453 /* If we can prove that TOP = cst * BOT for some constant cst,
1454 store cst to MUL and return true. Otherwise return false.
1455 The returned value is always sign-extended, regardless of the
1456 signedness of TOP and BOT. */
1458 static bool
1459 constant_multiple_of (tree top, tree bot, double_int *mul)
1461 tree mby;
1462 enum tree_code code;
1463 double_int res, p0, p1;
1464 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1466 STRIP_NOPS (top);
1467 STRIP_NOPS (bot);
1469 if (operand_equal_p (top, bot, 0))
1471 *mul = double_int_one;
1472 return true;
1475 code = TREE_CODE (top);
1476 switch (code)
1478 case MULT_EXPR:
1479 mby = TREE_OPERAND (top, 1);
1480 if (TREE_CODE (mby) != INTEGER_CST)
1481 return false;
1483 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1484 return false;
1486 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1487 precision);
1488 return true;
1490 case PLUS_EXPR:
1491 case MINUS_EXPR:
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1493 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1494 return false;
1496 if (code == MINUS_EXPR)
1497 p1 = double_int_neg (p1);
1498 *mul = double_int_sext (double_int_add (p0, p1), precision);
1499 return true;
1501 case INTEGER_CST:
1502 if (TREE_CODE (bot) != INTEGER_CST)
1503 return false;
1505 p0 = double_int_sext (tree_to_double_int (top), precision);
1506 p1 = double_int_sext (tree_to_double_int (bot), precision);
1507 if (double_int_zero_p (p1))
1508 return false;
1509 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1510 precision);
1511 return double_int_zero_p (res);
1513 default:
1514 return false;
1518 /* Returns true if memory reference REF with step STEP may be unaligned. */
1520 static bool
1521 may_be_unaligned_p (tree ref, tree step)
1523 tree base;
1524 tree base_type;
1525 HOST_WIDE_INT bitsize;
1526 HOST_WIDE_INT bitpos;
1527 tree toffset;
1528 enum machine_mode mode;
1529 int unsignedp, volatilep;
1530 unsigned base_align;
1532 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1533 thus they are not misaligned. */
1534 if (TREE_CODE (ref) == TARGET_MEM_REF)
1535 return false;
1537 /* The test below is basically copy of what expr.c:normal_inner_ref
1538 does to check whether the object must be loaded by parts when
1539 STRICT_ALIGNMENT is true. */
1540 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1541 &unsignedp, &volatilep, true);
1542 base_type = TREE_TYPE (base);
1543 base_align = TYPE_ALIGN (base_type);
1545 if (mode != BLKmode)
1547 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1549 if (base_align < mode_align
1550 || (bitpos % mode_align) != 0
1551 || (bitpos % BITS_PER_UNIT) != 0)
1552 return true;
1554 if (toffset
1555 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1556 return true;
1558 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1559 return true;
1562 return false;
1565 /* Return true if EXPR may be non-addressable. */
1567 static bool
1568 may_be_nonaddressable_p (tree expr)
1570 switch (TREE_CODE (expr))
1572 case TARGET_MEM_REF:
1573 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1574 target, thus they are always addressable. */
1575 return false;
1577 case COMPONENT_REF:
1578 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1579 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1581 case VIEW_CONVERT_EXPR:
1582 /* This kind of view-conversions may wrap non-addressable objects
1583 and make them look addressable. After some processing the
1584 non-addressability may be uncovered again, causing ADDR_EXPRs
1585 of inappropriate objects to be built. */
1586 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1587 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1588 return true;
1590 /* ... fall through ... */
1592 case ARRAY_REF:
1593 case ARRAY_RANGE_REF:
1594 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1596 CASE_CONVERT:
1597 return true;
1599 default:
1600 break;
1603 return false;
1606 /* Finds addresses in *OP_P inside STMT. */
1608 static void
1609 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1611 tree base = *op_p, step = build_int_cst (sizetype, 0);
1612 struct iv *civ;
1613 struct ifs_ivopts_data ifs_ivopts_data;
1615 /* Do not play with volatile memory references. A bit too conservative,
1616 perhaps, but safe. */
1617 if (gimple_has_volatile_ops (stmt))
1618 goto fail;
1620 /* Ignore bitfields for now. Not really something terribly complicated
1621 to handle. TODO. */
1622 if (TREE_CODE (base) == BIT_FIELD_REF)
1623 goto fail;
1625 base = unshare_expr (base);
1627 if (TREE_CODE (base) == TARGET_MEM_REF)
1629 tree type = build_pointer_type (TREE_TYPE (base));
1630 tree astep;
1632 if (TMR_BASE (base)
1633 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1635 civ = get_iv (data, TMR_BASE (base));
1636 if (!civ)
1637 goto fail;
1639 TMR_BASE (base) = civ->base;
1640 step = civ->step;
1642 if (TMR_INDEX (base)
1643 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1645 civ = get_iv (data, TMR_INDEX (base));
1646 if (!civ)
1647 goto fail;
1649 TMR_INDEX (base) = civ->base;
1650 astep = civ->step;
1652 if (astep)
1654 if (TMR_STEP (base))
1655 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1657 step = fold_build2 (PLUS_EXPR, type, step, astep);
1661 if (integer_zerop (step))
1662 goto fail;
1663 base = tree_mem_ref_addr (type, base);
1665 else
1667 ifs_ivopts_data.ivopts_data = data;
1668 ifs_ivopts_data.stmt = stmt;
1669 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1670 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1671 || integer_zerop (ifs_ivopts_data.step))
1672 goto fail;
1673 step = ifs_ivopts_data.step;
1675 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1676 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1678 /* Check that the base expression is addressable. This needs
1679 to be done after substituting bases of IVs into it. */
1680 if (may_be_nonaddressable_p (base))
1681 goto fail;
1683 /* Moreover, on strict alignment platforms, check that it is
1684 sufficiently aligned. */
1685 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1686 goto fail;
1688 base = build_fold_addr_expr (base);
1690 /* Substituting bases of IVs into the base expression might
1691 have caused folding opportunities. */
1692 if (TREE_CODE (base) == ADDR_EXPR)
1694 tree *ref = &TREE_OPERAND (base, 0);
1695 while (handled_component_p (*ref))
1696 ref = &TREE_OPERAND (*ref, 0);
1697 if (TREE_CODE (*ref) == INDIRECT_REF)
1699 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1700 if (tem)
1701 *ref = tem;
1706 civ = alloc_iv (base, step);
1707 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1708 return;
1710 fail:
1711 for_each_index (op_p, idx_record_use, data);
1714 /* Finds and records invariants used in STMT. */
1716 static void
1717 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1719 ssa_op_iter iter;
1720 use_operand_p use_p;
1721 tree op;
1723 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1725 op = USE_FROM_PTR (use_p);
1726 record_invariant (data, op, false);
1730 /* Finds interesting uses of induction variables in the statement STMT. */
1732 static void
1733 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1735 struct iv *iv;
1736 tree op, *lhs, *rhs;
1737 ssa_op_iter iter;
1738 use_operand_p use_p;
1739 enum tree_code code;
1741 find_invariants_stmt (data, stmt);
1743 if (gimple_code (stmt) == GIMPLE_COND)
1745 find_interesting_uses_cond (data, stmt);
1746 return;
1749 if (is_gimple_assign (stmt))
1751 lhs = gimple_assign_lhs_ptr (stmt);
1752 rhs = gimple_assign_rhs1_ptr (stmt);
1754 if (TREE_CODE (*lhs) == SSA_NAME)
1756 /* If the statement defines an induction variable, the uses are not
1757 interesting by themselves. */
1759 iv = get_iv (data, *lhs);
1761 if (iv && !integer_zerop (iv->step))
1762 return;
1765 code = gimple_assign_rhs_code (stmt);
1766 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1767 && (REFERENCE_CLASS_P (*rhs)
1768 || is_gimple_val (*rhs)))
1770 if (REFERENCE_CLASS_P (*rhs))
1771 find_interesting_uses_address (data, stmt, rhs);
1772 else
1773 find_interesting_uses_op (data, *rhs);
1775 if (REFERENCE_CLASS_P (*lhs))
1776 find_interesting_uses_address (data, stmt, lhs);
1777 return;
1779 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1781 find_interesting_uses_cond (data, stmt);
1782 return;
1785 /* TODO -- we should also handle address uses of type
1787 memory = call (whatever);
1791 call (memory). */
1794 if (gimple_code (stmt) == GIMPLE_PHI
1795 && gimple_bb (stmt) == data->current_loop->header)
1797 iv = get_iv (data, PHI_RESULT (stmt));
1799 if (iv && !integer_zerop (iv->step))
1800 return;
1803 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1805 op = USE_FROM_PTR (use_p);
1807 if (TREE_CODE (op) != SSA_NAME)
1808 continue;
1810 iv = get_iv (data, op);
1811 if (!iv)
1812 continue;
1814 find_interesting_uses_op (data, op);
1818 /* Finds interesting uses of induction variables outside of loops
1819 on loop exit edge EXIT. */
1821 static void
1822 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1824 gimple phi;
1825 gimple_stmt_iterator psi;
1826 tree def;
1828 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1830 phi = gsi_stmt (psi);
1831 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1832 if (is_gimple_reg (def))
1833 find_interesting_uses_op (data, def);
1837 /* Finds uses of the induction variables that are interesting. */
1839 static void
1840 find_interesting_uses (struct ivopts_data *data)
1842 basic_block bb;
1843 gimple_stmt_iterator bsi;
1844 basic_block *body = get_loop_body (data->current_loop);
1845 unsigned i;
1846 struct version_info *info;
1847 edge e;
1849 if (dump_file && (dump_flags & TDF_DETAILS))
1850 fprintf (dump_file, "Uses:\n\n");
1852 for (i = 0; i < data->current_loop->num_nodes; i++)
1854 edge_iterator ei;
1855 bb = body[i];
1857 FOR_EACH_EDGE (e, ei, bb->succs)
1858 if (e->dest != EXIT_BLOCK_PTR
1859 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1860 find_interesting_uses_outside (data, e);
1862 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1863 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1864 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1865 if (!is_gimple_debug (gsi_stmt (bsi)))
1866 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1869 if (dump_file && (dump_flags & TDF_DETAILS))
1871 bitmap_iterator bi;
1873 fprintf (dump_file, "\n");
1875 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1877 info = ver_info (data, i);
1878 if (info->inv_id)
1880 fprintf (dump_file, " ");
1881 print_generic_expr (dump_file, info->name, TDF_SLIM);
1882 fprintf (dump_file, " is invariant (%d)%s\n",
1883 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1887 fprintf (dump_file, "\n");
1890 free (body);
1893 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1894 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1895 we are at the top-level of the processed address. */
1897 static tree
1898 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1899 unsigned HOST_WIDE_INT *offset)
1901 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1902 enum tree_code code;
1903 tree type, orig_type = TREE_TYPE (expr);
1904 unsigned HOST_WIDE_INT off0, off1, st;
1905 tree orig_expr = expr;
1907 STRIP_NOPS (expr);
1909 type = TREE_TYPE (expr);
1910 code = TREE_CODE (expr);
1911 *offset = 0;
1913 switch (code)
1915 case INTEGER_CST:
1916 if (!cst_and_fits_in_hwi (expr)
1917 || integer_zerop (expr))
1918 return orig_expr;
1920 *offset = int_cst_value (expr);
1921 return build_int_cst (orig_type, 0);
1923 case POINTER_PLUS_EXPR:
1924 case PLUS_EXPR:
1925 case MINUS_EXPR:
1926 op0 = TREE_OPERAND (expr, 0);
1927 op1 = TREE_OPERAND (expr, 1);
1929 op0 = strip_offset_1 (op0, false, false, &off0);
1930 op1 = strip_offset_1 (op1, false, false, &off1);
1932 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1933 if (op0 == TREE_OPERAND (expr, 0)
1934 && op1 == TREE_OPERAND (expr, 1))
1935 return orig_expr;
1937 if (integer_zerop (op1))
1938 expr = op0;
1939 else if (integer_zerop (op0))
1941 if (code == MINUS_EXPR)
1942 expr = fold_build1 (NEGATE_EXPR, type, op1);
1943 else
1944 expr = op1;
1946 else
1947 expr = fold_build2 (code, type, op0, op1);
1949 return fold_convert (orig_type, expr);
1951 case MULT_EXPR:
1952 op1 = TREE_OPERAND (expr, 1);
1953 if (!cst_and_fits_in_hwi (op1))
1954 return orig_expr;
1956 op0 = TREE_OPERAND (expr, 0);
1957 op0 = strip_offset_1 (op0, false, false, &off0);
1958 if (op0 == TREE_OPERAND (expr, 0))
1959 return orig_expr;
1961 *offset = off0 * int_cst_value (op1);
1962 if (integer_zerop (op0))
1963 expr = op0;
1964 else
1965 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1967 return fold_convert (orig_type, expr);
1969 case ARRAY_REF:
1970 case ARRAY_RANGE_REF:
1971 if (!inside_addr)
1972 return orig_expr;
1974 step = array_ref_element_size (expr);
1975 if (!cst_and_fits_in_hwi (step))
1976 break;
1978 st = int_cst_value (step);
1979 op1 = TREE_OPERAND (expr, 1);
1980 op1 = strip_offset_1 (op1, false, false, &off1);
1981 *offset = off1 * st;
1983 if (top_compref
1984 && integer_zerop (op1))
1986 /* Strip the component reference completely. */
1987 op0 = TREE_OPERAND (expr, 0);
1988 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1989 *offset += off0;
1990 return op0;
1992 break;
1994 case COMPONENT_REF:
1995 if (!inside_addr)
1996 return orig_expr;
1998 tmp = component_ref_field_offset (expr);
1999 if (top_compref
2000 && cst_and_fits_in_hwi (tmp))
2002 /* Strip the component reference completely. */
2003 op0 = TREE_OPERAND (expr, 0);
2004 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2005 *offset = off0 + int_cst_value (tmp);
2006 return op0;
2008 break;
2010 case ADDR_EXPR:
2011 op0 = TREE_OPERAND (expr, 0);
2012 op0 = strip_offset_1 (op0, true, true, &off0);
2013 *offset += off0;
2015 if (op0 == TREE_OPERAND (expr, 0))
2016 return orig_expr;
2018 expr = build_fold_addr_expr (op0);
2019 return fold_convert (orig_type, expr);
2021 case INDIRECT_REF:
2022 inside_addr = false;
2023 break;
2025 default:
2026 return orig_expr;
2029 /* Default handling of expressions for that we want to recurse into
2030 the first operand. */
2031 op0 = TREE_OPERAND (expr, 0);
2032 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2033 *offset += off0;
2035 if (op0 == TREE_OPERAND (expr, 0)
2036 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2037 return orig_expr;
2039 expr = copy_node (expr);
2040 TREE_OPERAND (expr, 0) = op0;
2041 if (op1)
2042 TREE_OPERAND (expr, 1) = op1;
2044 /* Inside address, we might strip the top level component references,
2045 thus changing type of the expression. Handling of ADDR_EXPR
2046 will fix that. */
2047 expr = fold_convert (orig_type, expr);
2049 return expr;
2052 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2054 static tree
2055 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2057 return strip_offset_1 (expr, false, false, offset);
2060 /* Returns variant of TYPE that can be used as base for different uses.
2061 We return unsigned type with the same precision, which avoids problems
2062 with overflows. */
2064 static tree
2065 generic_type_for (tree type)
2067 if (POINTER_TYPE_P (type))
2068 return unsigned_type_for (type);
2070 if (TYPE_UNSIGNED (type))
2071 return type;
2073 return unsigned_type_for (type);
2076 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2077 the bitmap to that we should store it. */
2079 static struct ivopts_data *fd_ivopts_data;
2080 static tree
2081 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2083 bitmap *depends_on = (bitmap *) data;
2084 struct version_info *info;
2086 if (TREE_CODE (*expr_p) != SSA_NAME)
2087 return NULL_TREE;
2088 info = name_info (fd_ivopts_data, *expr_p);
2090 if (!info->inv_id || info->has_nonlin_use)
2091 return NULL_TREE;
2093 if (!*depends_on)
2094 *depends_on = BITMAP_ALLOC (NULL);
2095 bitmap_set_bit (*depends_on, info->inv_id);
2097 return NULL_TREE;
2100 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2101 position to POS. If USE is not NULL, the candidate is set as related to
2102 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2103 replacement of the final value of the iv by a direct computation. */
2105 static struct iv_cand *
2106 add_candidate_1 (struct ivopts_data *data,
2107 tree base, tree step, bool important, enum iv_position pos,
2108 struct iv_use *use, gimple incremented_at)
2110 unsigned i;
2111 struct iv_cand *cand = NULL;
2112 tree type, orig_type;
2114 if (base)
2116 orig_type = TREE_TYPE (base);
2117 type = generic_type_for (orig_type);
2118 if (type != orig_type)
2120 base = fold_convert (type, base);
2121 step = fold_convert (type, step);
2125 for (i = 0; i < n_iv_cands (data); i++)
2127 cand = iv_cand (data, i);
2129 if (cand->pos != pos)
2130 continue;
2132 if (cand->incremented_at != incremented_at
2133 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2134 && cand->ainc_use != use))
2135 continue;
2137 if (!cand->iv)
2139 if (!base && !step)
2140 break;
2142 continue;
2145 if (!base && !step)
2146 continue;
2148 if (operand_equal_p (base, cand->iv->base, 0)
2149 && operand_equal_p (step, cand->iv->step, 0))
2150 break;
2153 if (i == n_iv_cands (data))
2155 cand = XCNEW (struct iv_cand);
2156 cand->id = i;
2158 if (!base && !step)
2159 cand->iv = NULL;
2160 else
2161 cand->iv = alloc_iv (base, step);
2163 cand->pos = pos;
2164 if (pos != IP_ORIGINAL && cand->iv)
2166 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2167 cand->var_after = cand->var_before;
2169 cand->important = important;
2170 cand->incremented_at = incremented_at;
2171 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2173 if (step
2174 && TREE_CODE (step) != INTEGER_CST)
2176 fd_ivopts_data = data;
2177 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2180 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2181 cand->ainc_use = use;
2182 else
2183 cand->ainc_use = NULL;
2185 if (dump_file && (dump_flags & TDF_DETAILS))
2186 dump_cand (dump_file, cand);
2189 if (important && !cand->important)
2191 cand->important = true;
2192 if (dump_file && (dump_flags & TDF_DETAILS))
2193 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2196 if (use)
2198 bitmap_set_bit (use->related_cands, i);
2199 if (dump_file && (dump_flags & TDF_DETAILS))
2200 fprintf (dump_file, "Candidate %d is related to use %d\n",
2201 cand->id, use->id);
2204 return cand;
2207 /* Returns true if incrementing the induction variable at the end of the LOOP
2208 is allowed.
2210 The purpose is to avoid splitting latch edge with a biv increment, thus
2211 creating a jump, possibly confusing other optimization passes and leaving
2212 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2213 is not available (so we do not have a better alternative), or if the latch
2214 edge is already nonempty. */
2216 static bool
2217 allow_ip_end_pos_p (struct loop *loop)
2219 if (!ip_normal_pos (loop))
2220 return true;
2222 if (!empty_block_p (ip_end_pos (loop)))
2223 return true;
2225 return false;
2228 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2229 Important field is set to IMPORTANT. */
2231 static void
2232 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2233 bool important, struct iv_use *use)
2235 basic_block use_bb = gimple_bb (use->stmt);
2236 enum machine_mode mem_mode;
2237 unsigned HOST_WIDE_INT cstepi;
2239 /* If we insert the increment in any position other than the standard
2240 ones, we must ensure that it is incremented once per iteration.
2241 It must not be in an inner nested loop, or one side of an if
2242 statement. */
2243 if (use_bb->loop_father != data->current_loop
2244 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2245 || stmt_could_throw_p (use->stmt)
2246 || !cst_and_fits_in_hwi (step))
2247 return;
2249 cstepi = int_cst_value (step);
2251 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2252 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2253 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2255 enum tree_code code = MINUS_EXPR;
2256 tree new_base;
2257 tree new_step = step;
2259 if (POINTER_TYPE_P (TREE_TYPE (base)))
2261 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2262 code = POINTER_PLUS_EXPR;
2264 else
2265 new_step = fold_convert (TREE_TYPE (base), new_step);
2266 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2267 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2268 use->stmt);
2270 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2271 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2273 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2274 use->stmt);
2278 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2279 position to POS. If USE is not NULL, the candidate is set as related to
2280 it. The candidate computation is scheduled on all available positions. */
2282 static void
2283 add_candidate (struct ivopts_data *data,
2284 tree base, tree step, bool important, struct iv_use *use)
2286 if (ip_normal_pos (data->current_loop))
2287 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2288 if (ip_end_pos (data->current_loop)
2289 && allow_ip_end_pos_p (data->current_loop))
2290 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2292 if (use != NULL && use->type == USE_ADDRESS)
2293 add_autoinc_candidates (data, base, step, important, use);
2296 /* Add a standard "0 + 1 * iteration" iv candidate for a
2297 type with SIZE bits. */
2299 static void
2300 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2301 unsigned int size)
2303 tree type = lang_hooks.types.type_for_size (size, true);
2304 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2305 true, NULL);
2308 /* Adds standard iv candidates. */
2310 static void
2311 add_standard_iv_candidates (struct ivopts_data *data)
2313 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2315 /* The same for a double-integer type if it is still fast enough. */
2316 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2317 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2321 /* Adds candidates bases on the old induction variable IV. */
2323 static void
2324 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2326 gimple phi;
2327 tree def;
2328 struct iv_cand *cand;
2330 add_candidate (data, iv->base, iv->step, true, NULL);
2332 /* The same, but with initial value zero. */
2333 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2334 add_candidate (data, size_int (0), iv->step, true, NULL);
2335 else
2336 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2337 iv->step, true, NULL);
2339 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2340 if (gimple_code (phi) == GIMPLE_PHI)
2342 /* Additionally record the possibility of leaving the original iv
2343 untouched. */
2344 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2345 cand = add_candidate_1 (data,
2346 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2347 SSA_NAME_DEF_STMT (def));
2348 cand->var_before = iv->ssa_name;
2349 cand->var_after = def;
2353 /* Adds candidates based on the old induction variables. */
2355 static void
2356 add_old_ivs_candidates (struct ivopts_data *data)
2358 unsigned i;
2359 struct iv *iv;
2360 bitmap_iterator bi;
2362 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2364 iv = ver_info (data, i)->iv;
2365 if (iv && iv->biv_p && !integer_zerop (iv->step))
2366 add_old_iv_candidates (data, iv);
2370 /* Adds candidates based on the value of the induction variable IV and USE. */
2372 static void
2373 add_iv_value_candidates (struct ivopts_data *data,
2374 struct iv *iv, struct iv_use *use)
2376 unsigned HOST_WIDE_INT offset;
2377 tree base;
2378 tree basetype;
2380 add_candidate (data, iv->base, iv->step, false, use);
2382 /* The same, but with initial value zero. Make such variable important,
2383 since it is generic enough so that possibly many uses may be based
2384 on it. */
2385 basetype = TREE_TYPE (iv->base);
2386 if (POINTER_TYPE_P (basetype))
2387 basetype = sizetype;
2388 add_candidate (data, build_int_cst (basetype, 0),
2389 iv->step, true, use);
2391 /* Third, try removing the constant offset. Make sure to even
2392 add a candidate for &a[0] vs. (T *)&a. */
2393 base = strip_offset (iv->base, &offset);
2394 if (offset
2395 || base != iv->base)
2396 add_candidate (data, base, iv->step, false, use);
2399 /* Adds candidates based on the uses. */
2401 static void
2402 add_derived_ivs_candidates (struct ivopts_data *data)
2404 unsigned i;
2406 for (i = 0; i < n_iv_uses (data); i++)
2408 struct iv_use *use = iv_use (data, i);
2410 if (!use)
2411 continue;
2413 switch (use->type)
2415 case USE_NONLINEAR_EXPR:
2416 case USE_COMPARE:
2417 case USE_ADDRESS:
2418 /* Just add the ivs based on the value of the iv used here. */
2419 add_iv_value_candidates (data, use->iv, use);
2420 break;
2422 default:
2423 gcc_unreachable ();
2428 /* Record important candidates and add them to related_cands bitmaps
2429 if needed. */
2431 static void
2432 record_important_candidates (struct ivopts_data *data)
2434 unsigned i;
2435 struct iv_use *use;
2437 for (i = 0; i < n_iv_cands (data); i++)
2439 struct iv_cand *cand = iv_cand (data, i);
2441 if (cand->important)
2442 bitmap_set_bit (data->important_candidates, i);
2445 data->consider_all_candidates = (n_iv_cands (data)
2446 <= CONSIDER_ALL_CANDIDATES_BOUND);
2448 if (data->consider_all_candidates)
2450 /* We will not need "related_cands" bitmaps in this case,
2451 so release them to decrease peak memory consumption. */
2452 for (i = 0; i < n_iv_uses (data); i++)
2454 use = iv_use (data, i);
2455 BITMAP_FREE (use->related_cands);
2458 else
2460 /* Add important candidates to the related_cands bitmaps. */
2461 for (i = 0; i < n_iv_uses (data); i++)
2462 bitmap_ior_into (iv_use (data, i)->related_cands,
2463 data->important_candidates);
2467 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2468 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2469 we allocate a simple list to every use. */
2471 static void
2472 alloc_use_cost_map (struct ivopts_data *data)
2474 unsigned i, size, s, j;
2476 for (i = 0; i < n_iv_uses (data); i++)
2478 struct iv_use *use = iv_use (data, i);
2479 bitmap_iterator bi;
2481 if (data->consider_all_candidates)
2482 size = n_iv_cands (data);
2483 else
2485 s = 0;
2486 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2488 s++;
2491 /* Round up to the power of two, so that moduling by it is fast. */
2492 for (size = 1; size < s; size <<= 1)
2493 continue;
2496 use->n_map_members = size;
2497 use->cost_map = XCNEWVEC (struct cost_pair, size);
2501 /* Returns description of computation cost of expression whose runtime
2502 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2504 static comp_cost
2505 new_cost (unsigned runtime, unsigned complexity)
2507 comp_cost cost;
2509 cost.cost = runtime;
2510 cost.complexity = complexity;
2512 return cost;
2515 /* Adds costs COST1 and COST2. */
2517 static comp_cost
2518 add_costs (comp_cost cost1, comp_cost cost2)
2520 cost1.cost += cost2.cost;
2521 cost1.complexity += cost2.complexity;
2523 return cost1;
2525 /* Subtracts costs COST1 and COST2. */
2527 static comp_cost
2528 sub_costs (comp_cost cost1, comp_cost cost2)
2530 cost1.cost -= cost2.cost;
2531 cost1.complexity -= cost2.complexity;
2533 return cost1;
2536 /* Returns a negative number if COST1 < COST2, a positive number if
2537 COST1 > COST2, and 0 if COST1 = COST2. */
2539 static int
2540 compare_costs (comp_cost cost1, comp_cost cost2)
2542 if (cost1.cost == cost2.cost)
2543 return cost1.complexity - cost2.complexity;
2545 return cost1.cost - cost2.cost;
2548 /* Returns true if COST is infinite. */
2550 static bool
2551 infinite_cost_p (comp_cost cost)
2553 return cost.cost == INFTY;
2556 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2557 on invariants DEPENDS_ON and that the value used in expressing it
2558 is VALUE. */
2560 static void
2561 set_use_iv_cost (struct ivopts_data *data,
2562 struct iv_use *use, struct iv_cand *cand,
2563 comp_cost cost, bitmap depends_on, tree value)
2565 unsigned i, s;
2567 if (infinite_cost_p (cost))
2569 BITMAP_FREE (depends_on);
2570 return;
2573 if (data->consider_all_candidates)
2575 use->cost_map[cand->id].cand = cand;
2576 use->cost_map[cand->id].cost = cost;
2577 use->cost_map[cand->id].depends_on = depends_on;
2578 use->cost_map[cand->id].value = value;
2579 return;
2582 /* n_map_members is a power of two, so this computes modulo. */
2583 s = cand->id & (use->n_map_members - 1);
2584 for (i = s; i < use->n_map_members; i++)
2585 if (!use->cost_map[i].cand)
2586 goto found;
2587 for (i = 0; i < s; i++)
2588 if (!use->cost_map[i].cand)
2589 goto found;
2591 gcc_unreachable ();
2593 found:
2594 use->cost_map[i].cand = cand;
2595 use->cost_map[i].cost = cost;
2596 use->cost_map[i].depends_on = depends_on;
2597 use->cost_map[i].value = value;
2600 /* Gets cost of (USE, CANDIDATE) pair. */
2602 static struct cost_pair *
2603 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2604 struct iv_cand *cand)
2606 unsigned i, s;
2607 struct cost_pair *ret;
2609 if (!cand)
2610 return NULL;
2612 if (data->consider_all_candidates)
2614 ret = use->cost_map + cand->id;
2615 if (!ret->cand)
2616 return NULL;
2618 return ret;
2621 /* n_map_members is a power of two, so this computes modulo. */
2622 s = cand->id & (use->n_map_members - 1);
2623 for (i = s; i < use->n_map_members; i++)
2624 if (use->cost_map[i].cand == cand)
2625 return use->cost_map + i;
2627 for (i = 0; i < s; i++)
2628 if (use->cost_map[i].cand == cand)
2629 return use->cost_map + i;
2631 return NULL;
2634 /* Returns estimate on cost of computing SEQ. */
2636 static unsigned
2637 seq_cost (rtx seq, bool speed)
2639 unsigned cost = 0;
2640 rtx set;
2642 for (; seq; seq = NEXT_INSN (seq))
2644 set = single_set (seq);
2645 if (set)
2646 cost += rtx_cost (set, SET,speed);
2647 else
2648 cost++;
2651 return cost;
2654 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2655 static rtx
2656 produce_memory_decl_rtl (tree obj, int *regno)
2658 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2659 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2660 rtx x;
2662 gcc_assert (obj);
2663 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2665 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2666 x = gen_rtx_SYMBOL_REF (address_mode, name);
2667 SET_SYMBOL_REF_DECL (x, obj);
2668 x = gen_rtx_MEM (DECL_MODE (obj), x);
2669 set_mem_addr_space (x, as);
2670 targetm.encode_section_info (obj, x, true);
2672 else
2674 x = gen_raw_REG (address_mode, (*regno)++);
2675 x = gen_rtx_MEM (DECL_MODE (obj), x);
2676 set_mem_addr_space (x, as);
2679 return x;
2682 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2683 walk_tree. DATA contains the actual fake register number. */
2685 static tree
2686 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2688 tree obj = NULL_TREE;
2689 rtx x = NULL_RTX;
2690 int *regno = (int *) data;
2692 switch (TREE_CODE (*expr_p))
2694 case ADDR_EXPR:
2695 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2696 handled_component_p (*expr_p);
2697 expr_p = &TREE_OPERAND (*expr_p, 0))
2698 continue;
2699 obj = *expr_p;
2700 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2701 x = produce_memory_decl_rtl (obj, regno);
2702 break;
2704 case SSA_NAME:
2705 *ws = 0;
2706 obj = SSA_NAME_VAR (*expr_p);
2707 if (!DECL_RTL_SET_P (obj))
2708 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2709 break;
2711 case VAR_DECL:
2712 case PARM_DECL:
2713 case RESULT_DECL:
2714 *ws = 0;
2715 obj = *expr_p;
2717 if (DECL_RTL_SET_P (obj))
2718 break;
2720 if (DECL_MODE (obj) == BLKmode)
2721 x = produce_memory_decl_rtl (obj, regno);
2722 else
2723 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2725 break;
2727 default:
2728 break;
2731 if (x)
2733 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2734 SET_DECL_RTL (obj, x);
2737 return NULL_TREE;
2740 /* Determines cost of the computation of EXPR. */
2742 static unsigned
2743 computation_cost (tree expr, bool speed)
2745 rtx seq, rslt;
2746 tree type = TREE_TYPE (expr);
2747 unsigned cost;
2748 /* Avoid using hard regs in ways which may be unsupported. */
2749 int regno = LAST_VIRTUAL_REGISTER + 1;
2750 struct cgraph_node *node = cgraph_node (current_function_decl);
2751 enum node_frequency real_frequency = node->frequency;
2753 node->frequency = NODE_FREQUENCY_NORMAL;
2754 crtl->maybe_hot_insn_p = speed;
2755 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2756 start_sequence ();
2757 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2758 seq = get_insns ();
2759 end_sequence ();
2760 default_rtl_profile ();
2761 node->frequency = real_frequency;
2763 cost = seq_cost (seq, speed);
2764 if (MEM_P (rslt))
2765 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2766 TYPE_ADDR_SPACE (type), speed);
2768 return cost;
2771 /* Returns variable containing the value of candidate CAND at statement AT. */
2773 static tree
2774 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2776 if (stmt_after_increment (loop, cand, stmt))
2777 return cand->var_after;
2778 else
2779 return cand->var_before;
2782 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2783 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2786 tree_int_cst_sign_bit (const_tree t)
2788 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2789 unsigned HOST_WIDE_INT w;
2791 if (bitno < HOST_BITS_PER_WIDE_INT)
2792 w = TREE_INT_CST_LOW (t);
2793 else
2795 w = TREE_INT_CST_HIGH (t);
2796 bitno -= HOST_BITS_PER_WIDE_INT;
2799 return (w >> bitno) & 1;
2802 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2803 same precision that is at least as wide as the precision of TYPE, stores
2804 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2805 type of A and B. */
2807 static tree
2808 determine_common_wider_type (tree *a, tree *b)
2810 tree wider_type = NULL;
2811 tree suba, subb;
2812 tree atype = TREE_TYPE (*a);
2814 if (CONVERT_EXPR_P (*a))
2816 suba = TREE_OPERAND (*a, 0);
2817 wider_type = TREE_TYPE (suba);
2818 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2819 return atype;
2821 else
2822 return atype;
2824 if (CONVERT_EXPR_P (*b))
2826 subb = TREE_OPERAND (*b, 0);
2827 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2828 return atype;
2830 else
2831 return atype;
2833 *a = suba;
2834 *b = subb;
2835 return wider_type;
2838 /* Determines the expression by that USE is expressed from induction variable
2839 CAND at statement AT in LOOP. The expression is stored in a decomposed
2840 form into AFF. Returns false if USE cannot be expressed using CAND. */
2842 static bool
2843 get_computation_aff (struct loop *loop,
2844 struct iv_use *use, struct iv_cand *cand, gimple at,
2845 struct affine_tree_combination *aff)
2847 tree ubase = use->iv->base;
2848 tree ustep = use->iv->step;
2849 tree cbase = cand->iv->base;
2850 tree cstep = cand->iv->step, cstep_common;
2851 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2852 tree common_type, var;
2853 tree uutype;
2854 aff_tree cbase_aff, var_aff;
2855 double_int rat;
2857 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2859 /* We do not have a precision to express the values of use. */
2860 return false;
2863 var = var_at_stmt (loop, cand, at);
2864 uutype = unsigned_type_for (utype);
2866 /* If the conversion is not noop, perform it. */
2867 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2869 cstep = fold_convert (uutype, cstep);
2870 cbase = fold_convert (uutype, cbase);
2871 var = fold_convert (uutype, var);
2874 if (!constant_multiple_of (ustep, cstep, &rat))
2875 return false;
2877 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2878 type, we achieve better folding by computing their difference in this
2879 wider type, and cast the result to UUTYPE. We do not need to worry about
2880 overflows, as all the arithmetics will in the end be performed in UUTYPE
2881 anyway. */
2882 common_type = determine_common_wider_type (&ubase, &cbase);
2884 /* use = ubase - ratio * cbase + ratio * var. */
2885 tree_to_aff_combination (ubase, common_type, aff);
2886 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2887 tree_to_aff_combination (var, uutype, &var_aff);
2889 /* We need to shift the value if we are after the increment. */
2890 if (stmt_after_increment (loop, cand, at))
2892 aff_tree cstep_aff;
2894 if (common_type != uutype)
2895 cstep_common = fold_convert (common_type, cstep);
2896 else
2897 cstep_common = cstep;
2899 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2900 aff_combination_add (&cbase_aff, &cstep_aff);
2903 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2904 aff_combination_add (aff, &cbase_aff);
2905 if (common_type != uutype)
2906 aff_combination_convert (aff, uutype);
2908 aff_combination_scale (&var_aff, rat);
2909 aff_combination_add (aff, &var_aff);
2911 return true;
2914 /* Determines the expression by that USE is expressed from induction variable
2915 CAND at statement AT in LOOP. The computation is unshared. */
2917 static tree
2918 get_computation_at (struct loop *loop,
2919 struct iv_use *use, struct iv_cand *cand, gimple at)
2921 aff_tree aff;
2922 tree type = TREE_TYPE (use->iv->base);
2924 if (!get_computation_aff (loop, use, cand, at, &aff))
2925 return NULL_TREE;
2926 unshare_aff_combination (&aff);
2927 return fold_convert (type, aff_combination_to_tree (&aff));
2930 /* Determines the expression by that USE is expressed from induction variable
2931 CAND in LOOP. The computation is unshared. */
2933 static tree
2934 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2936 return get_computation_at (loop, use, cand, use->stmt);
2939 /* Adjust the cost COST for being in loop setup rather than loop body.
2940 If we're optimizing for space, the loop setup overhead is constant;
2941 if we're optimizing for speed, amortize it over the per-iteration cost. */
2942 static unsigned
2943 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2945 if (cost == INFTY)
2946 return cost;
2947 else if (optimize_loop_for_speed_p (data->current_loop))
2948 return cost / AVG_LOOP_NITER (data->current_loop);
2949 else
2950 return cost;
2953 /* Returns cost of addition in MODE. */
2955 static unsigned
2956 add_cost (enum machine_mode mode, bool speed)
2958 static unsigned costs[NUM_MACHINE_MODES];
2959 rtx seq;
2960 unsigned cost;
2962 if (costs[mode])
2963 return costs[mode];
2965 start_sequence ();
2966 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2967 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2968 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2969 NULL_RTX);
2970 seq = get_insns ();
2971 end_sequence ();
2973 cost = seq_cost (seq, speed);
2974 if (!cost)
2975 cost = 1;
2977 costs[mode] = cost;
2979 if (dump_file && (dump_flags & TDF_DETAILS))
2980 fprintf (dump_file, "Addition in %s costs %d\n",
2981 GET_MODE_NAME (mode), cost);
2982 return cost;
2985 /* Entry in a hashtable of already known costs for multiplication. */
2986 struct mbc_entry
2988 HOST_WIDE_INT cst; /* The constant to multiply by. */
2989 enum machine_mode mode; /* In mode. */
2990 unsigned cost; /* The cost. */
2993 /* Counts hash value for the ENTRY. */
2995 static hashval_t
2996 mbc_entry_hash (const void *entry)
2998 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3000 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3003 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3005 static int
3006 mbc_entry_eq (const void *entry1, const void *entry2)
3008 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3009 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3011 return (e1->mode == e2->mode
3012 && e1->cst == e2->cst);
3015 /* Returns cost of multiplication by constant CST in MODE. */
3017 unsigned
3018 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3020 static htab_t costs;
3021 struct mbc_entry **cached, act;
3022 rtx seq;
3023 unsigned cost;
3025 if (!costs)
3026 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3028 act.mode = mode;
3029 act.cst = cst;
3030 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3031 if (*cached)
3032 return (*cached)->cost;
3034 *cached = XNEW (struct mbc_entry);
3035 (*cached)->mode = mode;
3036 (*cached)->cst = cst;
3038 start_sequence ();
3039 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3040 gen_int_mode (cst, mode), NULL_RTX, 0);
3041 seq = get_insns ();
3042 end_sequence ();
3044 cost = seq_cost (seq, speed);
3046 if (dump_file && (dump_flags & TDF_DETAILS))
3047 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3048 (int) cst, GET_MODE_NAME (mode), cost);
3050 (*cached)->cost = cost;
3052 return cost;
3055 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3056 validity for a memory reference accessing memory of mode MODE in
3057 address space AS. */
3059 DEF_VEC_P (sbitmap);
3060 DEF_VEC_ALLOC_P (sbitmap, heap);
3062 bool
3063 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3064 addr_space_t as)
3066 #define MAX_RATIO 128
3067 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3068 static VEC (sbitmap, heap) *valid_mult_list;
3069 sbitmap valid_mult;
3071 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3072 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3074 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3075 if (!valid_mult)
3077 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3078 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3079 rtx addr;
3080 HOST_WIDE_INT i;
3082 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3083 sbitmap_zero (valid_mult);
3084 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3085 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3087 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3088 if (memory_address_addr_space_p (mode, addr, as))
3089 SET_BIT (valid_mult, i + MAX_RATIO);
3092 if (dump_file && (dump_flags & TDF_DETAILS))
3094 fprintf (dump_file, " allowed multipliers:");
3095 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3096 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3097 fprintf (dump_file, " %d", (int) i);
3098 fprintf (dump_file, "\n");
3099 fprintf (dump_file, "\n");
3102 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3105 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3106 return false;
3108 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3111 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3112 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3113 variable is omitted. Compute the cost for a memory reference that accesses
3114 a memory location of mode MEM_MODE in address space AS.
3116 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3117 size of MEM_MODE / RATIO) is available. To make this determination, we
3118 look at the size of the increment to be made, which is given in CSTEP.
3119 CSTEP may be zero if the step is unknown.
3120 STMT_AFTER_INC is true iff the statement we're looking at is after the
3121 increment of the original biv.
3123 TODO -- there must be some better way. This all is quite crude. */
3125 typedef struct
3127 HOST_WIDE_INT min_offset, max_offset;
3128 unsigned costs[2][2][2][2];
3129 } *address_cost_data;
3131 DEF_VEC_P (address_cost_data);
3132 DEF_VEC_ALLOC_P (address_cost_data, heap);
3134 static comp_cost
3135 get_address_cost (bool symbol_present, bool var_present,
3136 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3137 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3138 addr_space_t as, bool speed,
3139 bool stmt_after_inc, bool *may_autoinc)
3141 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3142 static VEC(address_cost_data, heap) *address_cost_data_list;
3143 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3144 address_cost_data data;
3145 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3146 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3147 unsigned cost, acost, complexity;
3148 bool offset_p, ratio_p, autoinc;
3149 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3150 unsigned HOST_WIDE_INT mask;
3151 unsigned bits;
3153 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3154 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3155 data_index + 1);
3157 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3158 if (!data)
3160 HOST_WIDE_INT i;
3161 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3162 HOST_WIDE_INT rat, off;
3163 int old_cse_not_expected;
3164 unsigned sym_p, var_p, off_p, rat_p, add_c;
3165 rtx seq, addr, base;
3166 rtx reg0, reg1;
3168 data = (address_cost_data) xcalloc (1, sizeof (*data));
3170 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3172 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3173 for (i = start; i <= 1 << 20; i <<= 1)
3175 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3176 if (!memory_address_addr_space_p (mem_mode, addr, as))
3177 break;
3179 data->max_offset = i == start ? 0 : i >> 1;
3180 off = data->max_offset;
3182 for (i = start; i <= 1 << 20; i <<= 1)
3184 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3185 if (!memory_address_addr_space_p (mem_mode, addr, as))
3186 break;
3188 data->min_offset = i == start ? 0 : -(i >> 1);
3190 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file, "get_address_cost:\n");
3193 fprintf (dump_file, " min offset %s %d\n",
3194 GET_MODE_NAME (mem_mode),
3195 (int) data->min_offset);
3196 fprintf (dump_file, " max offset %s %d\n",
3197 GET_MODE_NAME (mem_mode),
3198 (int) data->max_offset);
3201 rat = 1;
3202 for (i = 2; i <= MAX_RATIO; i++)
3203 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3205 rat = i;
3206 break;
3209 /* Compute the cost of various addressing modes. */
3210 acost = 0;
3211 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3212 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3214 if (HAVE_PRE_DECREMENT)
3216 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3217 has_predec[mem_mode]
3218 = memory_address_addr_space_p (mem_mode, addr, as);
3220 if (HAVE_POST_DECREMENT)
3222 addr = gen_rtx_POST_DEC (address_mode, reg0);
3223 has_postdec[mem_mode]
3224 = memory_address_addr_space_p (mem_mode, addr, as);
3226 if (HAVE_PRE_INCREMENT)
3228 addr = gen_rtx_PRE_INC (address_mode, reg0);
3229 has_preinc[mem_mode]
3230 = memory_address_addr_space_p (mem_mode, addr, as);
3232 if (HAVE_POST_INCREMENT)
3234 addr = gen_rtx_POST_INC (address_mode, reg0);
3235 has_postinc[mem_mode]
3236 = memory_address_addr_space_p (mem_mode, addr, as);
3238 for (i = 0; i < 16; i++)
3240 sym_p = i & 1;
3241 var_p = (i >> 1) & 1;
3242 off_p = (i >> 2) & 1;
3243 rat_p = (i >> 3) & 1;
3245 addr = reg0;
3246 if (rat_p)
3247 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3248 gen_int_mode (rat, address_mode));
3250 if (var_p)
3251 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3253 if (sym_p)
3255 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3256 /* ??? We can run into trouble with some backends by presenting
3257 it with symbols which haven't been properly passed through
3258 targetm.encode_section_info. By setting the local bit, we
3259 enhance the probability of things working. */
3260 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3262 if (off_p)
3263 base = gen_rtx_fmt_e (CONST, address_mode,
3264 gen_rtx_fmt_ee
3265 (PLUS, address_mode, base,
3266 gen_int_mode (off, address_mode)));
3268 else if (off_p)
3269 base = gen_int_mode (off, address_mode);
3270 else
3271 base = NULL_RTX;
3273 if (base)
3274 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3276 start_sequence ();
3277 /* To avoid splitting addressing modes, pretend that no cse will
3278 follow. */
3279 old_cse_not_expected = cse_not_expected;
3280 cse_not_expected = true;
3281 addr = memory_address_addr_space (mem_mode, addr, as);
3282 cse_not_expected = old_cse_not_expected;
3283 seq = get_insns ();
3284 end_sequence ();
3286 acost = seq_cost (seq, speed);
3287 acost += address_cost (addr, mem_mode, as, speed);
3289 if (!acost)
3290 acost = 1;
3291 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3294 /* On some targets, it is quite expensive to load symbol to a register,
3295 which makes addresses that contain symbols look much more expensive.
3296 However, the symbol will have to be loaded in any case before the
3297 loop (and quite likely we have it in register already), so it does not
3298 make much sense to penalize them too heavily. So make some final
3299 tweaks for the SYMBOL_PRESENT modes:
3301 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3302 var is cheaper, use this mode with small penalty.
3303 If VAR_PRESENT is true, try whether the mode with
3304 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3305 if this is the case, use it. */
3306 add_c = add_cost (address_mode, speed);
3307 for (i = 0; i < 8; i++)
3309 var_p = i & 1;
3310 off_p = (i >> 1) & 1;
3311 rat_p = (i >> 2) & 1;
3313 acost = data->costs[0][1][off_p][rat_p] + 1;
3314 if (var_p)
3315 acost += add_c;
3317 if (acost < data->costs[1][var_p][off_p][rat_p])
3318 data->costs[1][var_p][off_p][rat_p] = acost;
3321 if (dump_file && (dump_flags & TDF_DETAILS))
3323 fprintf (dump_file, "Address costs:\n");
3325 for (i = 0; i < 16; i++)
3327 sym_p = i & 1;
3328 var_p = (i >> 1) & 1;
3329 off_p = (i >> 2) & 1;
3330 rat_p = (i >> 3) & 1;
3332 fprintf (dump_file, " ");
3333 if (sym_p)
3334 fprintf (dump_file, "sym + ");
3335 if (var_p)
3336 fprintf (dump_file, "var + ");
3337 if (off_p)
3338 fprintf (dump_file, "cst + ");
3339 if (rat_p)
3340 fprintf (dump_file, "rat * ");
3342 acost = data->costs[sym_p][var_p][off_p][rat_p];
3343 fprintf (dump_file, "index costs %d\n", acost);
3345 if (has_predec[mem_mode] || has_postdec[mem_mode]
3346 || has_preinc[mem_mode] || has_postinc[mem_mode])
3347 fprintf (dump_file, " May include autoinc/dec\n");
3348 fprintf (dump_file, "\n");
3351 VEC_replace (address_cost_data, address_cost_data_list,
3352 data_index, data);
3355 bits = GET_MODE_BITSIZE (address_mode);
3356 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3357 offset &= mask;
3358 if ((offset >> (bits - 1) & 1))
3359 offset |= ~mask;
3360 s_offset = offset;
3362 autoinc = false;
3363 msize = GET_MODE_SIZE (mem_mode);
3364 autoinc_offset = offset;
3365 if (stmt_after_inc)
3366 autoinc_offset += ratio * cstep;
3367 if (symbol_present || var_present || ratio != 1)
3368 autoinc = false;
3369 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3370 && msize == cstep)
3371 || (has_postdec[mem_mode] && autoinc_offset == 0
3372 && msize == -cstep)
3373 || (has_preinc[mem_mode] && autoinc_offset == msize
3374 && msize == cstep)
3375 || (has_predec[mem_mode] && autoinc_offset == -msize
3376 && msize == -cstep))
3377 autoinc = true;
3379 cost = 0;
3380 offset_p = (s_offset != 0
3381 && data->min_offset <= s_offset
3382 && s_offset <= data->max_offset);
3383 ratio_p = (ratio != 1
3384 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3386 if (ratio != 1 && !ratio_p)
3387 cost += multiply_by_cost (ratio, address_mode, speed);
3389 if (s_offset && !offset_p && !symbol_present)
3390 cost += add_cost (address_mode, speed);
3392 if (may_autoinc)
3393 *may_autoinc = autoinc;
3394 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3395 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3396 return new_cost (cost + acost, complexity);
3399 /* Estimates cost of forcing expression EXPR into a variable. */
3401 static comp_cost
3402 force_expr_to_var_cost (tree expr, bool speed)
3404 static bool costs_initialized = false;
3405 static unsigned integer_cost [2];
3406 static unsigned symbol_cost [2];
3407 static unsigned address_cost [2];
3408 tree op0, op1;
3409 comp_cost cost0, cost1, cost;
3410 enum machine_mode mode;
3412 if (!costs_initialized)
3414 tree type = build_pointer_type (integer_type_node);
3415 tree var, addr;
3416 rtx x;
3417 int i;
3419 var = create_tmp_var_raw (integer_type_node, "test_var");
3420 TREE_STATIC (var) = 1;
3421 x = produce_memory_decl_rtl (var, NULL);
3422 SET_DECL_RTL (var, x);
3424 addr = build1 (ADDR_EXPR, type, var);
3427 for (i = 0; i < 2; i++)
3429 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3430 2000), i);
3432 symbol_cost[i] = computation_cost (addr, i) + 1;
3434 address_cost[i]
3435 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3436 addr,
3437 build_int_cst (sizetype, 2000)), i) + 1;
3438 if (dump_file && (dump_flags & TDF_DETAILS))
3440 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3441 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3442 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3443 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3444 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3445 fprintf (dump_file, "\n");
3449 costs_initialized = true;
3452 STRIP_NOPS (expr);
3454 if (SSA_VAR_P (expr))
3455 return zero_cost;
3457 if (is_gimple_min_invariant (expr))
3459 if (TREE_CODE (expr) == INTEGER_CST)
3460 return new_cost (integer_cost [speed], 0);
3462 if (TREE_CODE (expr) == ADDR_EXPR)
3464 tree obj = TREE_OPERAND (expr, 0);
3466 if (TREE_CODE (obj) == VAR_DECL
3467 || TREE_CODE (obj) == PARM_DECL
3468 || TREE_CODE (obj) == RESULT_DECL)
3469 return new_cost (symbol_cost [speed], 0);
3472 return new_cost (address_cost [speed], 0);
3475 switch (TREE_CODE (expr))
3477 case POINTER_PLUS_EXPR:
3478 case PLUS_EXPR:
3479 case MINUS_EXPR:
3480 case MULT_EXPR:
3481 op0 = TREE_OPERAND (expr, 0);
3482 op1 = TREE_OPERAND (expr, 1);
3483 STRIP_NOPS (op0);
3484 STRIP_NOPS (op1);
3486 if (is_gimple_val (op0))
3487 cost0 = zero_cost;
3488 else
3489 cost0 = force_expr_to_var_cost (op0, speed);
3491 if (is_gimple_val (op1))
3492 cost1 = zero_cost;
3493 else
3494 cost1 = force_expr_to_var_cost (op1, speed);
3496 break;
3498 case NEGATE_EXPR:
3499 op0 = TREE_OPERAND (expr, 0);
3500 STRIP_NOPS (op0);
3501 op1 = NULL_TREE;
3503 if (is_gimple_val (op0))
3504 cost0 = zero_cost;
3505 else
3506 cost0 = force_expr_to_var_cost (op0, speed);
3508 cost1 = zero_cost;
3509 break;
3511 default:
3512 /* Just an arbitrary value, FIXME. */
3513 return new_cost (target_spill_cost[speed], 0);
3516 mode = TYPE_MODE (TREE_TYPE (expr));
3517 switch (TREE_CODE (expr))
3519 case POINTER_PLUS_EXPR:
3520 case PLUS_EXPR:
3521 case MINUS_EXPR:
3522 case NEGATE_EXPR:
3523 cost = new_cost (add_cost (mode, speed), 0);
3524 break;
3526 case MULT_EXPR:
3527 if (cst_and_fits_in_hwi (op0))
3528 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3529 else if (cst_and_fits_in_hwi (op1))
3530 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3531 else
3532 return new_cost (target_spill_cost [speed], 0);
3533 break;
3535 default:
3536 gcc_unreachable ();
3539 cost = add_costs (cost, cost0);
3540 cost = add_costs (cost, cost1);
3542 /* Bound the cost by target_spill_cost. The parts of complicated
3543 computations often are either loop invariant or at least can
3544 be shared between several iv uses, so letting this grow without
3545 limits would not give reasonable results. */
3546 if (cost.cost > (int) target_spill_cost [speed])
3547 cost.cost = target_spill_cost [speed];
3549 return cost;
3552 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3553 invariants the computation depends on. */
3555 static comp_cost
3556 force_var_cost (struct ivopts_data *data,
3557 tree expr, bitmap *depends_on)
3559 if (depends_on)
3561 fd_ivopts_data = data;
3562 walk_tree (&expr, find_depends, depends_on, NULL);
3565 return force_expr_to_var_cost (expr, data->speed);
3568 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3569 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3570 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3571 invariants the computation depends on. */
3573 static comp_cost
3574 split_address_cost (struct ivopts_data *data,
3575 tree addr, bool *symbol_present, bool *var_present,
3576 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3578 tree core;
3579 HOST_WIDE_INT bitsize;
3580 HOST_WIDE_INT bitpos;
3581 tree toffset;
3582 enum machine_mode mode;
3583 int unsignedp, volatilep;
3585 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3586 &unsignedp, &volatilep, false);
3588 if (toffset != 0
3589 || bitpos % BITS_PER_UNIT != 0
3590 || TREE_CODE (core) != VAR_DECL)
3592 *symbol_present = false;
3593 *var_present = true;
3594 fd_ivopts_data = data;
3595 walk_tree (&addr, find_depends, depends_on, NULL);
3596 return new_cost (target_spill_cost[data->speed], 0);
3599 *offset += bitpos / BITS_PER_UNIT;
3600 if (TREE_STATIC (core)
3601 || DECL_EXTERNAL (core))
3603 *symbol_present = true;
3604 *var_present = false;
3605 return zero_cost;
3608 *symbol_present = false;
3609 *var_present = true;
3610 return zero_cost;
3613 /* Estimates cost of expressing difference of addresses E1 - E2 as
3614 var + symbol + offset. The value of offset is added to OFFSET,
3615 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3616 part is missing. DEPENDS_ON is a set of the invariants the computation
3617 depends on. */
3619 static comp_cost
3620 ptr_difference_cost (struct ivopts_data *data,
3621 tree e1, tree e2, bool *symbol_present, bool *var_present,
3622 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3624 HOST_WIDE_INT diff = 0;
3625 aff_tree aff_e1, aff_e2;
3626 tree type;
3628 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3630 if (ptr_difference_const (e1, e2, &diff))
3632 *offset += diff;
3633 *symbol_present = false;
3634 *var_present = false;
3635 return zero_cost;
3638 if (integer_zerop (e2))
3639 return split_address_cost (data, TREE_OPERAND (e1, 0),
3640 symbol_present, var_present, offset, depends_on);
3642 *symbol_present = false;
3643 *var_present = true;
3645 type = signed_type_for (TREE_TYPE (e1));
3646 tree_to_aff_combination (e1, type, &aff_e1);
3647 tree_to_aff_combination (e2, type, &aff_e2);
3648 aff_combination_scale (&aff_e2, double_int_minus_one);
3649 aff_combination_add (&aff_e1, &aff_e2);
3651 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3654 /* Estimates cost of expressing difference E1 - E2 as
3655 var + symbol + offset. The value of offset is added to OFFSET,
3656 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3657 part is missing. DEPENDS_ON is a set of the invariants the computation
3658 depends on. */
3660 static comp_cost
3661 difference_cost (struct ivopts_data *data,
3662 tree e1, tree e2, bool *symbol_present, bool *var_present,
3663 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3665 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3666 unsigned HOST_WIDE_INT off1, off2;
3667 aff_tree aff_e1, aff_e2;
3668 tree type;
3670 e1 = strip_offset (e1, &off1);
3671 e2 = strip_offset (e2, &off2);
3672 *offset += off1 - off2;
3674 STRIP_NOPS (e1);
3675 STRIP_NOPS (e2);
3677 if (TREE_CODE (e1) == ADDR_EXPR)
3678 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3679 offset, depends_on);
3680 *symbol_present = false;
3682 if (operand_equal_p (e1, e2, 0))
3684 *var_present = false;
3685 return zero_cost;
3688 *var_present = true;
3690 if (integer_zerop (e2))
3691 return force_var_cost (data, e1, depends_on);
3693 if (integer_zerop (e1))
3695 comp_cost cost = force_var_cost (data, e2, depends_on);
3696 cost.cost += multiply_by_cost (-1, mode, data->speed);
3697 return cost;
3700 type = signed_type_for (TREE_TYPE (e1));
3701 tree_to_aff_combination (e1, type, &aff_e1);
3702 tree_to_aff_combination (e2, type, &aff_e2);
3703 aff_combination_scale (&aff_e2, double_int_minus_one);
3704 aff_combination_add (&aff_e1, &aff_e2);
3706 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3709 /* Determines the cost of the computation by that USE is expressed
3710 from induction variable CAND. If ADDRESS_P is true, we just need
3711 to create an address from it, otherwise we want to get it into
3712 register. A set of invariants we depend on is stored in
3713 DEPENDS_ON. AT is the statement at that the value is computed.
3714 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3715 addressing is likely. */
3717 static comp_cost
3718 get_computation_cost_at (struct ivopts_data *data,
3719 struct iv_use *use, struct iv_cand *cand,
3720 bool address_p, bitmap *depends_on, gimple at,
3721 bool *can_autoinc)
3723 tree ubase = use->iv->base, ustep = use->iv->step;
3724 tree cbase, cstep;
3725 tree utype = TREE_TYPE (ubase), ctype;
3726 unsigned HOST_WIDE_INT cstepi, offset = 0;
3727 HOST_WIDE_INT ratio, aratio;
3728 bool var_present, symbol_present, stmt_is_after_inc;
3729 comp_cost cost;
3730 double_int rat;
3731 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3733 *depends_on = NULL;
3735 /* Only consider real candidates. */
3736 if (!cand->iv)
3737 return infinite_cost;
3739 cbase = cand->iv->base;
3740 cstep = cand->iv->step;
3741 ctype = TREE_TYPE (cbase);
3743 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3745 /* We do not have a precision to express the values of use. */
3746 return infinite_cost;
3749 if (address_p)
3751 /* Do not try to express address of an object with computation based
3752 on address of a different object. This may cause problems in rtl
3753 level alias analysis (that does not expect this to be happening,
3754 as this is illegal in C), and would be unlikely to be useful
3755 anyway. */
3756 if (use->iv->base_object
3757 && cand->iv->base_object
3758 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3759 return infinite_cost;
3762 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3764 /* TODO -- add direct handling of this case. */
3765 goto fallback;
3768 /* CSTEPI is removed from the offset in case statement is after the
3769 increment. If the step is not constant, we use zero instead.
3770 This is a bit imprecise (there is the extra addition), but
3771 redundancy elimination is likely to transform the code so that
3772 it uses value of the variable before increment anyway,
3773 so it is not that much unrealistic. */
3774 if (cst_and_fits_in_hwi (cstep))
3775 cstepi = int_cst_value (cstep);
3776 else
3777 cstepi = 0;
3779 if (!constant_multiple_of (ustep, cstep, &rat))
3780 return infinite_cost;
3782 if (double_int_fits_in_shwi_p (rat))
3783 ratio = double_int_to_shwi (rat);
3784 else
3785 return infinite_cost;
3787 STRIP_NOPS (cbase);
3788 ctype = TREE_TYPE (cbase);
3790 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3791 or ratio == 1, it is better to handle this like
3793 ubase - ratio * cbase + ratio * var
3795 (also holds in the case ratio == -1, TODO. */
3797 if (cst_and_fits_in_hwi (cbase))
3799 offset = - ratio * int_cst_value (cbase);
3800 cost = difference_cost (data,
3801 ubase, build_int_cst (utype, 0),
3802 &symbol_present, &var_present, &offset,
3803 depends_on);
3805 else if (ratio == 1)
3807 cost = difference_cost (data,
3808 ubase, cbase,
3809 &symbol_present, &var_present, &offset,
3810 depends_on);
3812 else if (address_p
3813 && !POINTER_TYPE_P (ctype)
3814 && multiplier_allowed_in_address_p
3815 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3816 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3818 cbase
3819 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3820 cost = difference_cost (data,
3821 ubase, cbase,
3822 &symbol_present, &var_present, &offset,
3823 depends_on);
3825 else
3827 cost = force_var_cost (data, cbase, depends_on);
3828 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3829 cost = add_costs (cost,
3830 difference_cost (data,
3831 ubase, build_int_cst (utype, 0),
3832 &symbol_present, &var_present,
3833 &offset, depends_on));
3836 /* If we are after the increment, the value of the candidate is higher by
3837 one iteration. */
3838 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3839 if (stmt_is_after_inc)
3840 offset -= ratio * cstepi;
3842 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3843 (symbol/var1/const parts may be omitted). If we are looking for an
3844 address, find the cost of addressing this. */
3845 if (address_p)
3846 return add_costs (cost,
3847 get_address_cost (symbol_present, var_present,
3848 offset, ratio, cstepi,
3849 TYPE_MODE (TREE_TYPE (utype)),
3850 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3851 speed, stmt_is_after_inc,
3852 can_autoinc));
3854 /* Otherwise estimate the costs for computing the expression. */
3855 if (!symbol_present && !var_present && !offset)
3857 if (ratio != 1)
3858 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3859 return cost;
3862 /* Symbol + offset should be compile-time computable so consider that they
3863 are added once to the variable, if present. */
3864 if (var_present && (symbol_present || offset))
3865 cost.cost += adjust_setup_cost (data,
3866 add_cost (TYPE_MODE (ctype), speed));
3868 /* Having offset does not affect runtime cost in case it is added to
3869 symbol, but it increases complexity. */
3870 if (offset)
3871 cost.complexity++;
3873 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3875 aratio = ratio > 0 ? ratio : -ratio;
3876 if (aratio != 1)
3877 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3878 return cost;
3880 fallback:
3881 if (can_autoinc)
3882 *can_autoinc = false;
3885 /* Just get the expression, expand it and measure the cost. */
3886 tree comp = get_computation_at (data->current_loop, use, cand, at);
3888 if (!comp)
3889 return infinite_cost;
3891 if (address_p)
3892 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3894 return new_cost (computation_cost (comp, speed), 0);
3898 /* Determines the cost of the computation by that USE is expressed
3899 from induction variable CAND. If ADDRESS_P is true, we just need
3900 to create an address from it, otherwise we want to get it into
3901 register. A set of invariants we depend on is stored in
3902 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3903 autoinc addressing is likely. */
3905 static comp_cost
3906 get_computation_cost (struct ivopts_data *data,
3907 struct iv_use *use, struct iv_cand *cand,
3908 bool address_p, bitmap *depends_on, bool *can_autoinc)
3910 return get_computation_cost_at (data,
3911 use, cand, address_p, depends_on, use->stmt,
3912 can_autoinc);
3915 /* Determines cost of basing replacement of USE on CAND in a generic
3916 expression. */
3918 static bool
3919 determine_use_iv_cost_generic (struct ivopts_data *data,
3920 struct iv_use *use, struct iv_cand *cand)
3922 bitmap depends_on;
3923 comp_cost cost;
3925 /* The simple case first -- if we need to express value of the preserved
3926 original biv, the cost is 0. This also prevents us from counting the
3927 cost of increment twice -- once at this use and once in the cost of
3928 the candidate. */
3929 if (cand->pos == IP_ORIGINAL
3930 && cand->incremented_at == use->stmt)
3932 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3933 return true;
3936 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3937 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3939 return !infinite_cost_p (cost);
3942 /* Determines cost of basing replacement of USE on CAND in an address. */
3944 static bool
3945 determine_use_iv_cost_address (struct ivopts_data *data,
3946 struct iv_use *use, struct iv_cand *cand)
3948 bitmap depends_on;
3949 bool can_autoinc;
3950 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3951 &can_autoinc);
3953 if (cand->ainc_use == use)
3955 if (can_autoinc)
3956 cost.cost -= cand->cost_step;
3957 /* If we generated the candidate solely for exploiting autoincrement
3958 opportunities, and it turns out it can't be used, set the cost to
3959 infinity to make sure we ignore it. */
3960 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3961 cost = infinite_cost;
3963 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3965 return !infinite_cost_p (cost);
3968 /* Computes value of candidate CAND at position AT in iteration NITER, and
3969 stores it to VAL. */
3971 static void
3972 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3973 aff_tree *val)
3975 aff_tree step, delta, nit;
3976 struct iv *iv = cand->iv;
3977 tree type = TREE_TYPE (iv->base);
3978 tree steptype = type;
3979 if (POINTER_TYPE_P (type))
3980 steptype = sizetype;
3982 tree_to_aff_combination (iv->step, steptype, &step);
3983 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3984 aff_combination_convert (&nit, steptype);
3985 aff_combination_mult (&nit, &step, &delta);
3986 if (stmt_after_increment (loop, cand, at))
3987 aff_combination_add (&delta, &step);
3989 tree_to_aff_combination (iv->base, type, val);
3990 aff_combination_add (val, &delta);
3993 /* Returns period of induction variable iv. */
3995 static tree
3996 iv_period (struct iv *iv)
3998 tree step = iv->step, period, type;
3999 tree pow2div;
4001 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4003 /* Period of the iv is gcd (step, type range). Since type range is power
4004 of two, it suffices to determine the maximum power of two that divides
4005 step. */
4006 pow2div = num_ending_zeros (step);
4007 type = unsigned_type_for (TREE_TYPE (step));
4009 period = build_low_bits_mask (type,
4010 (TYPE_PRECISION (type)
4011 - tree_low_cst (pow2div, 1)));
4013 return period;
4016 /* Returns the comparison operator used when eliminating the iv USE. */
4018 static enum tree_code
4019 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4021 struct loop *loop = data->current_loop;
4022 basic_block ex_bb;
4023 edge exit;
4025 ex_bb = gimple_bb (use->stmt);
4026 exit = EDGE_SUCC (ex_bb, 0);
4027 if (flow_bb_inside_loop_p (loop, exit->dest))
4028 exit = EDGE_SUCC (ex_bb, 1);
4030 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4033 /* Check whether it is possible to express the condition in USE by comparison
4034 of candidate CAND. If so, store the value compared with to BOUND. */
4036 static bool
4037 may_eliminate_iv (struct ivopts_data *data,
4038 struct iv_use *use, struct iv_cand *cand, tree *bound)
4040 basic_block ex_bb;
4041 edge exit;
4042 tree nit, period;
4043 struct loop *loop = data->current_loop;
4044 aff_tree bnd;
4046 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4047 return false;
4049 /* For now works only for exits that dominate the loop latch.
4050 TODO: extend to other conditions inside loop body. */
4051 ex_bb = gimple_bb (use->stmt);
4052 if (use->stmt != last_stmt (ex_bb)
4053 || gimple_code (use->stmt) != GIMPLE_COND
4054 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4055 return false;
4057 exit = EDGE_SUCC (ex_bb, 0);
4058 if (flow_bb_inside_loop_p (loop, exit->dest))
4059 exit = EDGE_SUCC (ex_bb, 1);
4060 if (flow_bb_inside_loop_p (loop, exit->dest))
4061 return false;
4063 nit = niter_for_exit (data, exit);
4064 if (!nit)
4065 return false;
4067 /* Determine whether we can use the variable to test the exit condition.
4068 This is the case iff the period of the induction variable is greater
4069 than the number of iterations for which the exit condition is true. */
4070 period = iv_period (cand->iv);
4072 /* If the number of iterations is constant, compare against it directly. */
4073 if (TREE_CODE (nit) == INTEGER_CST)
4075 if (!tree_int_cst_lt (nit, period))
4076 return false;
4079 /* If not, and if this is the only possible exit of the loop, see whether
4080 we can get a conservative estimate on the number of iterations of the
4081 entire loop and compare against that instead. */
4082 else if (loop_only_exit_p (loop, exit))
4084 double_int period_value, max_niter;
4085 if (!estimated_loop_iterations (loop, true, &max_niter))
4086 return false;
4087 period_value = tree_to_double_int (period);
4088 if (double_int_ucmp (max_niter, period_value) >= 0)
4089 return false;
4092 /* Otherwise, punt. */
4093 else
4094 return false;
4096 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4098 *bound = aff_combination_to_tree (&bnd);
4099 /* It is unlikely that computing the number of iterations using division
4100 would be more profitable than keeping the original induction variable. */
4101 if (expression_expensive_p (*bound))
4102 return false;
4103 return true;
4106 /* Determines cost of basing replacement of USE on CAND in a condition. */
4108 static bool
4109 determine_use_iv_cost_condition (struct ivopts_data *data,
4110 struct iv_use *use, struct iv_cand *cand)
4112 tree bound = NULL_TREE;
4113 struct iv *cmp_iv;
4114 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4115 comp_cost elim_cost, express_cost, cost;
4116 bool ok;
4117 tree *control_var, *bound_cst;
4119 /* Only consider real candidates. */
4120 if (!cand->iv)
4122 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4123 return false;
4126 /* Try iv elimination. */
4127 if (may_eliminate_iv (data, use, cand, &bound))
4129 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4130 /* The bound is a loop invariant, so it will be only computed
4131 once. */
4132 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4134 else
4135 elim_cost = infinite_cost;
4137 /* Try expressing the original giv. If it is compared with an invariant,
4138 note that we cannot get rid of it. */
4139 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4140 NULL, &cmp_iv);
4141 gcc_assert (ok);
4143 /* When the condition is a comparison of the candidate IV against
4144 zero, prefer this IV.
4146 TODO: The constant that we're substracting from the cost should
4147 be target-dependent. This information should be added to the
4148 target costs for each backend. */
4149 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4150 && integer_zerop (*bound_cst)
4151 && (operand_equal_p (*control_var, cand->var_after, 0)
4152 || operand_equal_p (*control_var, cand->var_before, 0)))
4153 elim_cost.cost -= 1;
4155 express_cost = get_computation_cost (data, use, cand, false,
4156 &depends_on_express, NULL);
4157 fd_ivopts_data = data;
4158 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4160 /* Choose the better approach, preferring the eliminated IV. */
4161 if (compare_costs (elim_cost, express_cost) <= 0)
4163 cost = elim_cost;
4164 depends_on = depends_on_elim;
4165 depends_on_elim = NULL;
4167 else
4169 cost = express_cost;
4170 depends_on = depends_on_express;
4171 depends_on_express = NULL;
4172 bound = NULL_TREE;
4175 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4177 if (depends_on_elim)
4178 BITMAP_FREE (depends_on_elim);
4179 if (depends_on_express)
4180 BITMAP_FREE (depends_on_express);
4182 return !infinite_cost_p (cost);
4185 /* Determines cost of basing replacement of USE on CAND. Returns false
4186 if USE cannot be based on CAND. */
4188 static bool
4189 determine_use_iv_cost (struct ivopts_data *data,
4190 struct iv_use *use, struct iv_cand *cand)
4192 switch (use->type)
4194 case USE_NONLINEAR_EXPR:
4195 return determine_use_iv_cost_generic (data, use, cand);
4197 case USE_ADDRESS:
4198 return determine_use_iv_cost_address (data, use, cand);
4200 case USE_COMPARE:
4201 return determine_use_iv_cost_condition (data, use, cand);
4203 default:
4204 gcc_unreachable ();
4208 /* Return true if get_computation_cost indicates that autoincrement is
4209 a possibility for the pair of USE and CAND, false otherwise. */
4211 static bool
4212 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4213 struct iv_cand *cand)
4215 bitmap depends_on;
4216 bool can_autoinc;
4217 comp_cost cost;
4219 if (use->type != USE_ADDRESS)
4220 return false;
4222 cost = get_computation_cost (data, use, cand, true, &depends_on,
4223 &can_autoinc);
4225 BITMAP_FREE (depends_on);
4227 return !infinite_cost_p (cost) && can_autoinc;
4230 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4231 use that allows autoincrement, and set their AINC_USE if possible. */
4233 static void
4234 set_autoinc_for_original_candidates (struct ivopts_data *data)
4236 unsigned i, j;
4238 for (i = 0; i < n_iv_cands (data); i++)
4240 struct iv_cand *cand = iv_cand (data, i);
4241 struct iv_use *closest = NULL;
4242 if (cand->pos != IP_ORIGINAL)
4243 continue;
4244 for (j = 0; j < n_iv_uses (data); j++)
4246 struct iv_use *use = iv_use (data, j);
4247 unsigned uid = gimple_uid (use->stmt);
4248 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4249 || uid > gimple_uid (cand->incremented_at))
4250 continue;
4251 if (closest == NULL || uid > gimple_uid (closest->stmt))
4252 closest = use;
4254 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4255 continue;
4256 cand->ainc_use = closest;
4260 /* Finds the candidates for the induction variables. */
4262 static void
4263 find_iv_candidates (struct ivopts_data *data)
4265 /* Add commonly used ivs. */
4266 add_standard_iv_candidates (data);
4268 /* Add old induction variables. */
4269 add_old_ivs_candidates (data);
4271 /* Add induction variables derived from uses. */
4272 add_derived_ivs_candidates (data);
4274 set_autoinc_for_original_candidates (data);
4276 /* Record the important candidates. */
4277 record_important_candidates (data);
4280 /* Determines costs of basing the use of the iv on an iv candidate. */
4282 static void
4283 determine_use_iv_costs (struct ivopts_data *data)
4285 unsigned i, j;
4286 struct iv_use *use;
4287 struct iv_cand *cand;
4288 bitmap to_clear = BITMAP_ALLOC (NULL);
4290 alloc_use_cost_map (data);
4292 for (i = 0; i < n_iv_uses (data); i++)
4294 use = iv_use (data, i);
4296 if (data->consider_all_candidates)
4298 for (j = 0; j < n_iv_cands (data); j++)
4300 cand = iv_cand (data, j);
4301 determine_use_iv_cost (data, use, cand);
4304 else
4306 bitmap_iterator bi;
4308 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4310 cand = iv_cand (data, j);
4311 if (!determine_use_iv_cost (data, use, cand))
4312 bitmap_set_bit (to_clear, j);
4315 /* Remove the candidates for that the cost is infinite from
4316 the list of related candidates. */
4317 bitmap_and_compl_into (use->related_cands, to_clear);
4318 bitmap_clear (to_clear);
4322 BITMAP_FREE (to_clear);
4324 if (dump_file && (dump_flags & TDF_DETAILS))
4326 fprintf (dump_file, "Use-candidate costs:\n");
4328 for (i = 0; i < n_iv_uses (data); i++)
4330 use = iv_use (data, i);
4332 fprintf (dump_file, "Use %d:\n", i);
4333 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4334 for (j = 0; j < use->n_map_members; j++)
4336 if (!use->cost_map[j].cand
4337 || infinite_cost_p (use->cost_map[j].cost))
4338 continue;
4340 fprintf (dump_file, " %d\t%d\t%d\t",
4341 use->cost_map[j].cand->id,
4342 use->cost_map[j].cost.cost,
4343 use->cost_map[j].cost.complexity);
4344 if (use->cost_map[j].depends_on)
4345 bitmap_print (dump_file,
4346 use->cost_map[j].depends_on, "","");
4347 fprintf (dump_file, "\n");
4350 fprintf (dump_file, "\n");
4352 fprintf (dump_file, "\n");
4356 /* Determines cost of the candidate CAND. */
4358 static void
4359 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4361 comp_cost cost_base;
4362 unsigned cost, cost_step;
4363 tree base;
4365 if (!cand->iv)
4367 cand->cost = 0;
4368 return;
4371 /* There are two costs associated with the candidate -- its increment
4372 and its initialization. The second is almost negligible for any loop
4373 that rolls enough, so we take it just very little into account. */
4375 base = cand->iv->base;
4376 cost_base = force_var_cost (data, base, NULL);
4377 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4379 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4381 /* Prefer the original ivs unless we may gain something by replacing it.
4382 The reason is to make debugging simpler; so this is not relevant for
4383 artificial ivs created by other optimization passes. */
4384 if (cand->pos != IP_ORIGINAL
4385 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4386 cost++;
4388 /* Prefer not to insert statements into latch unless there are some
4389 already (so that we do not create unnecessary jumps). */
4390 if (cand->pos == IP_END
4391 && empty_block_p (ip_end_pos (data->current_loop)))
4392 cost++;
4394 cand->cost = cost;
4395 cand->cost_step = cost_step;
4398 /* Determines costs of computation of the candidates. */
4400 static void
4401 determine_iv_costs (struct ivopts_data *data)
4403 unsigned i;
4405 if (dump_file && (dump_flags & TDF_DETAILS))
4407 fprintf (dump_file, "Candidate costs:\n");
4408 fprintf (dump_file, " cand\tcost\n");
4411 for (i = 0; i < n_iv_cands (data); i++)
4413 struct iv_cand *cand = iv_cand (data, i);
4415 determine_iv_cost (data, cand);
4417 if (dump_file && (dump_flags & TDF_DETAILS))
4418 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4421 if (dump_file && (dump_flags & TDF_DETAILS))
4422 fprintf (dump_file, "\n");
4425 /* Calculates cost for having SIZE induction variables. */
4427 static unsigned
4428 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4430 /* We add size to the cost, so that we prefer eliminating ivs
4431 if possible. */
4432 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4435 /* For each size of the induction variable set determine the penalty. */
4437 static void
4438 determine_set_costs (struct ivopts_data *data)
4440 unsigned j, n;
4441 gimple phi;
4442 gimple_stmt_iterator psi;
4443 tree op;
4444 struct loop *loop = data->current_loop;
4445 bitmap_iterator bi;
4447 /* We use the following model (definitely improvable, especially the
4448 cost function -- TODO):
4450 We estimate the number of registers available (using MD data), name it A.
4452 We estimate the number of registers used by the loop, name it U. This
4453 number is obtained as the number of loop phi nodes (not counting virtual
4454 registers and bivs) + the number of variables from outside of the loop.
4456 We set a reserve R (free regs that are used for temporary computations,
4457 etc.). For now the reserve is a constant 3.
4459 Let I be the number of induction variables.
4461 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4462 make a lot of ivs without a reason).
4463 -- if A - R < U + I <= A, the cost is I * PRES_COST
4464 -- if U + I > A, the cost is I * PRES_COST and
4465 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4467 if (dump_file && (dump_flags & TDF_DETAILS))
4469 fprintf (dump_file, "Global costs:\n");
4470 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4471 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4472 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4475 n = 0;
4476 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4478 phi = gsi_stmt (psi);
4479 op = PHI_RESULT (phi);
4481 if (!is_gimple_reg (op))
4482 continue;
4484 if (get_iv (data, op))
4485 continue;
4487 n++;
4490 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4492 struct version_info *info = ver_info (data, j);
4494 if (info->inv_id && info->has_nonlin_use)
4495 n++;
4498 data->regs_used = n;
4499 if (dump_file && (dump_flags & TDF_DETAILS))
4500 fprintf (dump_file, " regs_used %d\n", n);
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4504 fprintf (dump_file, " cost for size:\n");
4505 fprintf (dump_file, " ivs\tcost\n");
4506 for (j = 0; j <= 2 * target_avail_regs; j++)
4507 fprintf (dump_file, " %d\t%d\n", j,
4508 ivopts_global_cost_for_size (data, j));
4509 fprintf (dump_file, "\n");
4513 /* Returns true if A is a cheaper cost pair than B. */
4515 static bool
4516 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4518 int cmp;
4520 if (!a)
4521 return false;
4523 if (!b)
4524 return true;
4526 cmp = compare_costs (a->cost, b->cost);
4527 if (cmp < 0)
4528 return true;
4530 if (cmp > 0)
4531 return false;
4533 /* In case the costs are the same, prefer the cheaper candidate. */
4534 if (a->cand->cost < b->cand->cost)
4535 return true;
4537 return false;
4540 /* Computes the cost field of IVS structure. */
4542 static void
4543 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4545 comp_cost cost = ivs->cand_use_cost;
4546 cost.cost += ivs->cand_cost;
4547 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4549 ivs->cost = cost;
4552 /* Remove invariants in set INVS to set IVS. */
4554 static void
4555 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4557 bitmap_iterator bi;
4558 unsigned iid;
4560 if (!invs)
4561 return;
4563 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4565 ivs->n_invariant_uses[iid]--;
4566 if (ivs->n_invariant_uses[iid] == 0)
4567 ivs->n_regs--;
4571 /* Set USE not to be expressed by any candidate in IVS. */
4573 static void
4574 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4575 struct iv_use *use)
4577 unsigned uid = use->id, cid;
4578 struct cost_pair *cp;
4580 cp = ivs->cand_for_use[uid];
4581 if (!cp)
4582 return;
4583 cid = cp->cand->id;
4585 ivs->bad_uses++;
4586 ivs->cand_for_use[uid] = NULL;
4587 ivs->n_cand_uses[cid]--;
4589 if (ivs->n_cand_uses[cid] == 0)
4591 bitmap_clear_bit (ivs->cands, cid);
4592 /* Do not count the pseudocandidates. */
4593 if (cp->cand->iv)
4594 ivs->n_regs--;
4595 ivs->n_cands--;
4596 ivs->cand_cost -= cp->cand->cost;
4598 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4601 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4603 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4604 iv_ca_recount_cost (data, ivs);
4607 /* Add invariants in set INVS to set IVS. */
4609 static void
4610 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4612 bitmap_iterator bi;
4613 unsigned iid;
4615 if (!invs)
4616 return;
4618 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4620 ivs->n_invariant_uses[iid]++;
4621 if (ivs->n_invariant_uses[iid] == 1)
4622 ivs->n_regs++;
4626 /* Set cost pair for USE in set IVS to CP. */
4628 static void
4629 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4630 struct iv_use *use, struct cost_pair *cp)
4632 unsigned uid = use->id, cid;
4634 if (ivs->cand_for_use[uid] == cp)
4635 return;
4637 if (ivs->cand_for_use[uid])
4638 iv_ca_set_no_cp (data, ivs, use);
4640 if (cp)
4642 cid = cp->cand->id;
4644 ivs->bad_uses--;
4645 ivs->cand_for_use[uid] = cp;
4646 ivs->n_cand_uses[cid]++;
4647 if (ivs->n_cand_uses[cid] == 1)
4649 bitmap_set_bit (ivs->cands, cid);
4650 /* Do not count the pseudocandidates. */
4651 if (cp->cand->iv)
4652 ivs->n_regs++;
4653 ivs->n_cands++;
4654 ivs->cand_cost += cp->cand->cost;
4656 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4659 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4660 iv_ca_set_add_invariants (ivs, cp->depends_on);
4661 iv_ca_recount_cost (data, ivs);
4665 /* Extend set IVS by expressing USE by some of the candidates in it
4666 if possible. */
4668 static void
4669 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4670 struct iv_use *use)
4672 struct cost_pair *best_cp = NULL, *cp;
4673 bitmap_iterator bi;
4674 unsigned i;
4676 gcc_assert (ivs->upto >= use->id);
4678 if (ivs->upto == use->id)
4680 ivs->upto++;
4681 ivs->bad_uses++;
4684 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4686 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4688 if (cheaper_cost_pair (cp, best_cp))
4689 best_cp = cp;
4692 iv_ca_set_cp (data, ivs, use, best_cp);
4695 /* Get cost for assignment IVS. */
4697 static comp_cost
4698 iv_ca_cost (struct iv_ca *ivs)
4700 /* This was a conditional expression but it triggered a bug in
4701 Sun C 5.5. */
4702 if (ivs->bad_uses)
4703 return infinite_cost;
4704 else
4705 return ivs->cost;
4708 /* Returns true if all dependences of CP are among invariants in IVS. */
4710 static bool
4711 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4713 unsigned i;
4714 bitmap_iterator bi;
4716 if (!cp->depends_on)
4717 return true;
4719 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4721 if (ivs->n_invariant_uses[i] == 0)
4722 return false;
4725 return true;
4728 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4729 it before NEXT_CHANGE. */
4731 static struct iv_ca_delta *
4732 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4733 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4735 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4737 change->use = use;
4738 change->old_cp = old_cp;
4739 change->new_cp = new_cp;
4740 change->next_change = next_change;
4742 return change;
4745 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4746 are rewritten. */
4748 static struct iv_ca_delta *
4749 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4751 struct iv_ca_delta *last;
4753 if (!l2)
4754 return l1;
4756 if (!l1)
4757 return l2;
4759 for (last = l1; last->next_change; last = last->next_change)
4760 continue;
4761 last->next_change = l2;
4763 return l1;
4766 /* Returns candidate by that USE is expressed in IVS. */
4768 static struct cost_pair *
4769 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4771 return ivs->cand_for_use[use->id];
4774 /* Reverse the list of changes DELTA, forming the inverse to it. */
4776 static struct iv_ca_delta *
4777 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4779 struct iv_ca_delta *act, *next, *prev = NULL;
4780 struct cost_pair *tmp;
4782 for (act = delta; act; act = next)
4784 next = act->next_change;
4785 act->next_change = prev;
4786 prev = act;
4788 tmp = act->old_cp;
4789 act->old_cp = act->new_cp;
4790 act->new_cp = tmp;
4793 return prev;
4796 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4797 reverted instead. */
4799 static void
4800 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4801 struct iv_ca_delta *delta, bool forward)
4803 struct cost_pair *from, *to;
4804 struct iv_ca_delta *act;
4806 if (!forward)
4807 delta = iv_ca_delta_reverse (delta);
4809 for (act = delta; act; act = act->next_change)
4811 from = act->old_cp;
4812 to = act->new_cp;
4813 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4814 iv_ca_set_cp (data, ivs, act->use, to);
4817 if (!forward)
4818 iv_ca_delta_reverse (delta);
4821 /* Returns true if CAND is used in IVS. */
4823 static bool
4824 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4826 return ivs->n_cand_uses[cand->id] > 0;
4829 /* Returns number of induction variable candidates in the set IVS. */
4831 static unsigned
4832 iv_ca_n_cands (struct iv_ca *ivs)
4834 return ivs->n_cands;
4837 /* Free the list of changes DELTA. */
4839 static void
4840 iv_ca_delta_free (struct iv_ca_delta **delta)
4842 struct iv_ca_delta *act, *next;
4844 for (act = *delta; act; act = next)
4846 next = act->next_change;
4847 free (act);
4850 *delta = NULL;
4853 /* Allocates new iv candidates assignment. */
4855 static struct iv_ca *
4856 iv_ca_new (struct ivopts_data *data)
4858 struct iv_ca *nw = XNEW (struct iv_ca);
4860 nw->upto = 0;
4861 nw->bad_uses = 0;
4862 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4863 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4864 nw->cands = BITMAP_ALLOC (NULL);
4865 nw->n_cands = 0;
4866 nw->n_regs = 0;
4867 nw->cand_use_cost = zero_cost;
4868 nw->cand_cost = 0;
4869 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4870 nw->cost = zero_cost;
4872 return nw;
4875 /* Free memory occupied by the set IVS. */
4877 static void
4878 iv_ca_free (struct iv_ca **ivs)
4880 free ((*ivs)->cand_for_use);
4881 free ((*ivs)->n_cand_uses);
4882 BITMAP_FREE ((*ivs)->cands);
4883 free ((*ivs)->n_invariant_uses);
4884 free (*ivs);
4885 *ivs = NULL;
4888 /* Dumps IVS to FILE. */
4890 static void
4891 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4893 const char *pref = " invariants ";
4894 unsigned i;
4895 comp_cost cost = iv_ca_cost (ivs);
4897 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4898 bitmap_print (file, ivs->cands, " candidates ","\n");
4900 for (i = 1; i <= data->max_inv_id; i++)
4901 if (ivs->n_invariant_uses[i])
4903 fprintf (file, "%s%d", pref, i);
4904 pref = ", ";
4906 fprintf (file, "\n");
4909 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4910 new set, and store differences in DELTA. Number of induction variables
4911 in the new set is stored to N_IVS. */
4913 static comp_cost
4914 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4915 struct iv_cand *cand, struct iv_ca_delta **delta,
4916 unsigned *n_ivs)
4918 unsigned i;
4919 comp_cost cost;
4920 struct iv_use *use;
4921 struct cost_pair *old_cp, *new_cp;
4923 *delta = NULL;
4924 for (i = 0; i < ivs->upto; i++)
4926 use = iv_use (data, i);
4927 old_cp = iv_ca_cand_for_use (ivs, use);
4929 if (old_cp
4930 && old_cp->cand == cand)
4931 continue;
4933 new_cp = get_use_iv_cost (data, use, cand);
4934 if (!new_cp)
4935 continue;
4937 if (!iv_ca_has_deps (ivs, new_cp))
4938 continue;
4940 if (!cheaper_cost_pair (new_cp, old_cp))
4941 continue;
4943 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4946 iv_ca_delta_commit (data, ivs, *delta, true);
4947 cost = iv_ca_cost (ivs);
4948 if (n_ivs)
4949 *n_ivs = iv_ca_n_cands (ivs);
4950 iv_ca_delta_commit (data, ivs, *delta, false);
4952 return cost;
4955 /* Try narrowing set IVS by removing CAND. Return the cost of
4956 the new set and store the differences in DELTA. */
4958 static comp_cost
4959 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4960 struct iv_cand *cand, struct iv_ca_delta **delta)
4962 unsigned i, ci;
4963 struct iv_use *use;
4964 struct cost_pair *old_cp, *new_cp, *cp;
4965 bitmap_iterator bi;
4966 struct iv_cand *cnd;
4967 comp_cost cost;
4969 *delta = NULL;
4970 for (i = 0; i < n_iv_uses (data); i++)
4972 use = iv_use (data, i);
4974 old_cp = iv_ca_cand_for_use (ivs, use);
4975 if (old_cp->cand != cand)
4976 continue;
4978 new_cp = NULL;
4980 if (data->consider_all_candidates)
4982 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4984 if (ci == cand->id)
4985 continue;
4987 cnd = iv_cand (data, ci);
4989 cp = get_use_iv_cost (data, use, cnd);
4990 if (!cp)
4991 continue;
4992 if (!iv_ca_has_deps (ivs, cp))
4993 continue;
4995 if (!cheaper_cost_pair (cp, new_cp))
4996 continue;
4998 new_cp = cp;
5001 else
5003 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5005 if (ci == cand->id)
5006 continue;
5008 cnd = iv_cand (data, ci);
5010 cp = get_use_iv_cost (data, use, cnd);
5011 if (!cp)
5012 continue;
5013 if (!iv_ca_has_deps (ivs, cp))
5014 continue;
5016 if (!cheaper_cost_pair (cp, new_cp))
5017 continue;
5019 new_cp = cp;
5023 if (!new_cp)
5025 iv_ca_delta_free (delta);
5026 return infinite_cost;
5029 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5032 iv_ca_delta_commit (data, ivs, *delta, true);
5033 cost = iv_ca_cost (ivs);
5034 iv_ca_delta_commit (data, ivs, *delta, false);
5036 return cost;
5039 /* Try optimizing the set of candidates IVS by removing candidates different
5040 from to EXCEPT_CAND from it. Return cost of the new set, and store
5041 differences in DELTA. */
5043 static comp_cost
5044 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5045 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5047 bitmap_iterator bi;
5048 struct iv_ca_delta *act_delta, *best_delta;
5049 unsigned i;
5050 comp_cost best_cost, acost;
5051 struct iv_cand *cand;
5053 best_delta = NULL;
5054 best_cost = iv_ca_cost (ivs);
5056 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5058 cand = iv_cand (data, i);
5060 if (cand == except_cand)
5061 continue;
5063 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5065 if (compare_costs (acost, best_cost) < 0)
5067 best_cost = acost;
5068 iv_ca_delta_free (&best_delta);
5069 best_delta = act_delta;
5071 else
5072 iv_ca_delta_free (&act_delta);
5075 if (!best_delta)
5077 *delta = NULL;
5078 return best_cost;
5081 /* Recurse to possibly remove other unnecessary ivs. */
5082 iv_ca_delta_commit (data, ivs, best_delta, true);
5083 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5084 iv_ca_delta_commit (data, ivs, best_delta, false);
5085 *delta = iv_ca_delta_join (best_delta, *delta);
5086 return best_cost;
5089 /* Tries to extend the sets IVS in the best possible way in order
5090 to express the USE. */
5092 static bool
5093 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5094 struct iv_use *use)
5096 comp_cost best_cost, act_cost;
5097 unsigned i;
5098 bitmap_iterator bi;
5099 struct iv_cand *cand;
5100 struct iv_ca_delta *best_delta = NULL, *act_delta;
5101 struct cost_pair *cp;
5103 iv_ca_add_use (data, ivs, use);
5104 best_cost = iv_ca_cost (ivs);
5106 cp = iv_ca_cand_for_use (ivs, use);
5107 if (cp)
5109 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5110 iv_ca_set_no_cp (data, ivs, use);
5113 /* First try important candidates not based on any memory object. Only if
5114 this fails, try the specific ones. Rationale -- in loops with many
5115 variables the best choice often is to use just one generic biv. If we
5116 added here many ivs specific to the uses, the optimization algorithm later
5117 would be likely to get stuck in a local minimum, thus causing us to create
5118 too many ivs. The approach from few ivs to more seems more likely to be
5119 successful -- starting from few ivs, replacing an expensive use by a
5120 specific iv should always be a win. */
5121 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5123 cand = iv_cand (data, i);
5125 if (cand->iv->base_object != NULL_TREE)
5126 continue;
5128 if (iv_ca_cand_used_p (ivs, cand))
5129 continue;
5131 cp = get_use_iv_cost (data, use, cand);
5132 if (!cp)
5133 continue;
5135 iv_ca_set_cp (data, ivs, use, cp);
5136 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5137 iv_ca_set_no_cp (data, ivs, use);
5138 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5140 if (compare_costs (act_cost, best_cost) < 0)
5142 best_cost = act_cost;
5144 iv_ca_delta_free (&best_delta);
5145 best_delta = act_delta;
5147 else
5148 iv_ca_delta_free (&act_delta);
5151 if (infinite_cost_p (best_cost))
5153 for (i = 0; i < use->n_map_members; i++)
5155 cp = use->cost_map + i;
5156 cand = cp->cand;
5157 if (!cand)
5158 continue;
5160 /* Already tried this. */
5161 if (cand->important && cand->iv->base_object == NULL_TREE)
5162 continue;
5164 if (iv_ca_cand_used_p (ivs, cand))
5165 continue;
5167 act_delta = NULL;
5168 iv_ca_set_cp (data, ivs, use, cp);
5169 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5170 iv_ca_set_no_cp (data, ivs, use);
5171 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5172 cp, act_delta);
5174 if (compare_costs (act_cost, best_cost) < 0)
5176 best_cost = act_cost;
5178 if (best_delta)
5179 iv_ca_delta_free (&best_delta);
5180 best_delta = act_delta;
5182 else
5183 iv_ca_delta_free (&act_delta);
5187 iv_ca_delta_commit (data, ivs, best_delta, true);
5188 iv_ca_delta_free (&best_delta);
5190 return !infinite_cost_p (best_cost);
5193 /* Finds an initial assignment of candidates to uses. */
5195 static struct iv_ca *
5196 get_initial_solution (struct ivopts_data *data)
5198 struct iv_ca *ivs = iv_ca_new (data);
5199 unsigned i;
5201 for (i = 0; i < n_iv_uses (data); i++)
5202 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5204 iv_ca_free (&ivs);
5205 return NULL;
5208 return ivs;
5211 /* Tries to improve set of induction variables IVS. */
5213 static bool
5214 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5216 unsigned i, n_ivs;
5217 comp_cost acost, best_cost = iv_ca_cost (ivs);
5218 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5219 struct iv_cand *cand;
5221 /* Try extending the set of induction variables by one. */
5222 for (i = 0; i < n_iv_cands (data); i++)
5224 cand = iv_cand (data, i);
5226 if (iv_ca_cand_used_p (ivs, cand))
5227 continue;
5229 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5230 if (!act_delta)
5231 continue;
5233 /* If we successfully added the candidate and the set is small enough,
5234 try optimizing it by removing other candidates. */
5235 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5237 iv_ca_delta_commit (data, ivs, act_delta, true);
5238 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5239 iv_ca_delta_commit (data, ivs, act_delta, false);
5240 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5243 if (compare_costs (acost, best_cost) < 0)
5245 best_cost = acost;
5246 iv_ca_delta_free (&best_delta);
5247 best_delta = act_delta;
5249 else
5250 iv_ca_delta_free (&act_delta);
5253 if (!best_delta)
5255 /* Try removing the candidates from the set instead. */
5256 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5258 /* Nothing more we can do. */
5259 if (!best_delta)
5260 return false;
5263 iv_ca_delta_commit (data, ivs, best_delta, true);
5264 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5265 iv_ca_delta_free (&best_delta);
5266 return true;
5269 /* Attempts to find the optimal set of induction variables. We do simple
5270 greedy heuristic -- we try to replace at most one candidate in the selected
5271 solution and remove the unused ivs while this improves the cost. */
5273 static struct iv_ca *
5274 find_optimal_iv_set (struct ivopts_data *data)
5276 unsigned i;
5277 struct iv_ca *set;
5278 struct iv_use *use;
5280 /* Get the initial solution. */
5281 set = get_initial_solution (data);
5282 if (!set)
5284 if (dump_file && (dump_flags & TDF_DETAILS))
5285 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5286 return NULL;
5289 if (dump_file && (dump_flags & TDF_DETAILS))
5291 fprintf (dump_file, "Initial set of candidates:\n");
5292 iv_ca_dump (data, dump_file, set);
5295 while (try_improve_iv_set (data, set))
5297 if (dump_file && (dump_flags & TDF_DETAILS))
5299 fprintf (dump_file, "Improved to:\n");
5300 iv_ca_dump (data, dump_file, set);
5304 if (dump_file && (dump_flags & TDF_DETAILS))
5306 comp_cost cost = iv_ca_cost (set);
5307 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5310 for (i = 0; i < n_iv_uses (data); i++)
5312 use = iv_use (data, i);
5313 use->selected = iv_ca_cand_for_use (set, use)->cand;
5316 return set;
5319 /* Creates a new induction variable corresponding to CAND. */
5321 static void
5322 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5324 gimple_stmt_iterator incr_pos;
5325 tree base;
5326 bool after = false;
5328 if (!cand->iv)
5329 return;
5331 switch (cand->pos)
5333 case IP_NORMAL:
5334 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5335 break;
5337 case IP_END:
5338 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5339 after = true;
5340 break;
5342 case IP_AFTER_USE:
5343 after = true;
5344 /* fall through */
5345 case IP_BEFORE_USE:
5346 incr_pos = gsi_for_stmt (cand->incremented_at);
5347 break;
5349 case IP_ORIGINAL:
5350 /* Mark that the iv is preserved. */
5351 name_info (data, cand->var_before)->preserve_biv = true;
5352 name_info (data, cand->var_after)->preserve_biv = true;
5354 /* Rewrite the increment so that it uses var_before directly. */
5355 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5357 return;
5360 gimple_add_tmp_var (cand->var_before);
5361 add_referenced_var (cand->var_before);
5363 base = unshare_expr (cand->iv->base);
5365 create_iv (base, unshare_expr (cand->iv->step),
5366 cand->var_before, data->current_loop,
5367 &incr_pos, after, &cand->var_before, &cand->var_after);
5370 /* Creates new induction variables described in SET. */
5372 static void
5373 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5375 unsigned i;
5376 struct iv_cand *cand;
5377 bitmap_iterator bi;
5379 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5381 cand = iv_cand (data, i);
5382 create_new_iv (data, cand);
5387 /* Rewrites USE (definition of iv used in a nonlinear expression)
5388 using candidate CAND. */
5390 static void
5391 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5392 struct iv_use *use, struct iv_cand *cand)
5394 tree comp;
5395 tree op, tgt;
5396 gimple ass;
5397 gimple_stmt_iterator bsi;
5399 /* An important special case -- if we are asked to express value of
5400 the original iv by itself, just exit; there is no need to
5401 introduce a new computation (that might also need casting the
5402 variable to unsigned and back). */
5403 if (cand->pos == IP_ORIGINAL
5404 && cand->incremented_at == use->stmt)
5406 tree step, ctype, utype;
5407 enum tree_code incr_code = PLUS_EXPR, old_code;
5409 gcc_assert (is_gimple_assign (use->stmt));
5410 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5412 step = cand->iv->step;
5413 ctype = TREE_TYPE (step);
5414 utype = TREE_TYPE (cand->var_after);
5415 if (TREE_CODE (step) == NEGATE_EXPR)
5417 incr_code = MINUS_EXPR;
5418 step = TREE_OPERAND (step, 0);
5421 /* Check whether we may leave the computation unchanged.
5422 This is the case only if it does not rely on other
5423 computations in the loop -- otherwise, the computation
5424 we rely upon may be removed in remove_unused_ivs,
5425 thus leading to ICE. */
5426 old_code = gimple_assign_rhs_code (use->stmt);
5427 if (old_code == PLUS_EXPR
5428 || old_code == MINUS_EXPR
5429 || old_code == POINTER_PLUS_EXPR)
5431 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5432 op = gimple_assign_rhs2 (use->stmt);
5433 else if (old_code != MINUS_EXPR
5434 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5435 op = gimple_assign_rhs1 (use->stmt);
5436 else
5437 op = NULL_TREE;
5439 else
5440 op = NULL_TREE;
5442 if (op
5443 && (TREE_CODE (op) == INTEGER_CST
5444 || operand_equal_p (op, step, 0)))
5445 return;
5447 /* Otherwise, add the necessary computations to express
5448 the iv. */
5449 op = fold_convert (ctype, cand->var_before);
5450 comp = fold_convert (utype,
5451 build2 (incr_code, ctype, op,
5452 unshare_expr (step)));
5454 else
5456 comp = get_computation (data->current_loop, use, cand);
5457 gcc_assert (comp != NULL_TREE);
5460 switch (gimple_code (use->stmt))
5462 case GIMPLE_PHI:
5463 tgt = PHI_RESULT (use->stmt);
5465 /* If we should keep the biv, do not replace it. */
5466 if (name_info (data, tgt)->preserve_biv)
5467 return;
5469 bsi = gsi_after_labels (gimple_bb (use->stmt));
5470 break;
5472 case GIMPLE_ASSIGN:
5473 tgt = gimple_assign_lhs (use->stmt);
5474 bsi = gsi_for_stmt (use->stmt);
5475 break;
5477 default:
5478 gcc_unreachable ();
5481 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5482 true, GSI_SAME_STMT);
5484 if (gimple_code (use->stmt) == GIMPLE_PHI)
5486 ass = gimple_build_assign (tgt, op);
5487 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5489 bsi = gsi_for_stmt (use->stmt);
5490 remove_phi_node (&bsi, false);
5492 else
5494 gimple_assign_set_rhs_from_tree (&bsi, op);
5495 use->stmt = gsi_stmt (bsi);
5499 /* Replaces ssa name in index IDX by its basic variable. Callback for
5500 for_each_index. */
5502 static bool
5503 idx_remove_ssa_names (tree base, tree *idx,
5504 void *data ATTRIBUTE_UNUSED)
5506 tree *op;
5508 if (TREE_CODE (*idx) == SSA_NAME)
5509 *idx = SSA_NAME_VAR (*idx);
5511 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5513 op = &TREE_OPERAND (base, 2);
5514 if (*op
5515 && TREE_CODE (*op) == SSA_NAME)
5516 *op = SSA_NAME_VAR (*op);
5517 op = &TREE_OPERAND (base, 3);
5518 if (*op
5519 && TREE_CODE (*op) == SSA_NAME)
5520 *op = SSA_NAME_VAR (*op);
5523 return true;
5526 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5528 static tree
5529 unshare_and_remove_ssa_names (tree ref)
5531 ref = unshare_expr (ref);
5532 for_each_index (&ref, idx_remove_ssa_names, NULL);
5534 return ref;
5537 /* Copies the reference information from OLD_REF to NEW_REF. */
5539 static void
5540 copy_ref_info (tree new_ref, tree old_ref)
5542 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5543 copy_mem_ref_info (new_ref, old_ref);
5544 else
5546 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5547 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5548 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5552 /* Rewrites USE (address that is an iv) using candidate CAND. */
5554 static void
5555 rewrite_use_address (struct ivopts_data *data,
5556 struct iv_use *use, struct iv_cand *cand)
5558 aff_tree aff;
5559 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5560 tree base_hint = NULL_TREE;
5561 tree ref;
5562 bool ok;
5564 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5565 gcc_assert (ok);
5566 unshare_aff_combination (&aff);
5568 /* To avoid undefined overflow problems, all IV candidates use unsigned
5569 integer types. The drawback is that this makes it impossible for
5570 create_mem_ref to distinguish an IV that is based on a memory object
5571 from one that represents simply an offset.
5573 To work around this problem, we pass a hint to create_mem_ref that
5574 indicates which variable (if any) in aff is an IV based on a memory
5575 object. Note that we only consider the candidate. If this is not
5576 based on an object, the base of the reference is in some subexpression
5577 of the use -- but these will use pointer types, so they are recognized
5578 by the create_mem_ref heuristics anyway. */
5579 if (cand->iv->base_object)
5580 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5582 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5583 data->speed);
5584 copy_ref_info (ref, *use->op_p);
5585 *use->op_p = ref;
5588 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5589 candidate CAND. */
5591 static void
5592 rewrite_use_compare (struct ivopts_data *data,
5593 struct iv_use *use, struct iv_cand *cand)
5595 tree comp, *var_p, op, bound;
5596 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5597 enum tree_code compare;
5598 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5599 bool ok;
5601 bound = cp->value;
5602 if (bound)
5604 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5605 tree var_type = TREE_TYPE (var);
5606 gimple_seq stmts;
5608 compare = iv_elimination_compare (data, use);
5609 bound = unshare_expr (fold_convert (var_type, bound));
5610 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5611 if (stmts)
5612 gsi_insert_seq_on_edge_immediate (
5613 loop_preheader_edge (data->current_loop),
5614 stmts);
5616 gimple_cond_set_lhs (use->stmt, var);
5617 gimple_cond_set_code (use->stmt, compare);
5618 gimple_cond_set_rhs (use->stmt, op);
5619 return;
5622 /* The induction variable elimination failed; just express the original
5623 giv. */
5624 comp = get_computation (data->current_loop, use, cand);
5625 gcc_assert (comp != NULL_TREE);
5627 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5628 gcc_assert (ok);
5630 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5631 true, GSI_SAME_STMT);
5634 /* Rewrites USE using candidate CAND. */
5636 static void
5637 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5639 switch (use->type)
5641 case USE_NONLINEAR_EXPR:
5642 rewrite_use_nonlinear_expr (data, use, cand);
5643 break;
5645 case USE_ADDRESS:
5646 rewrite_use_address (data, use, cand);
5647 break;
5649 case USE_COMPARE:
5650 rewrite_use_compare (data, use, cand);
5651 break;
5653 default:
5654 gcc_unreachable ();
5657 update_stmt (use->stmt);
5660 /* Rewrite the uses using the selected induction variables. */
5662 static void
5663 rewrite_uses (struct ivopts_data *data)
5665 unsigned i;
5666 struct iv_cand *cand;
5667 struct iv_use *use;
5669 for (i = 0; i < n_iv_uses (data); i++)
5671 use = iv_use (data, i);
5672 cand = use->selected;
5673 gcc_assert (cand);
5675 rewrite_use (data, use, cand);
5679 /* Removes the ivs that are not used after rewriting. */
5681 static void
5682 remove_unused_ivs (struct ivopts_data *data)
5684 unsigned j;
5685 bitmap_iterator bi;
5686 bitmap toremove = BITMAP_ALLOC (NULL);
5688 /* Figure out an order in which to release SSA DEFs so that we don't
5689 release something that we'd have to propagate into a debug stmt
5690 afterwards. */
5691 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5693 struct version_info *info;
5695 info = ver_info (data, j);
5696 if (info->iv
5697 && !integer_zerop (info->iv->step)
5698 && !info->inv_id
5699 && !info->iv->have_use_for
5700 && !info->preserve_biv)
5701 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5704 release_defs_bitset (toremove);
5706 BITMAP_FREE (toremove);
5709 /* Frees data allocated by the optimization of a single loop. */
5711 static void
5712 free_loop_data (struct ivopts_data *data)
5714 unsigned i, j;
5715 bitmap_iterator bi;
5716 tree obj;
5718 if (data->niters)
5720 pointer_map_destroy (data->niters);
5721 data->niters = NULL;
5724 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5726 struct version_info *info;
5728 info = ver_info (data, i);
5729 if (info->iv)
5730 free (info->iv);
5731 info->iv = NULL;
5732 info->has_nonlin_use = false;
5733 info->preserve_biv = false;
5734 info->inv_id = 0;
5736 bitmap_clear (data->relevant);
5737 bitmap_clear (data->important_candidates);
5739 for (i = 0; i < n_iv_uses (data); i++)
5741 struct iv_use *use = iv_use (data, i);
5743 free (use->iv);
5744 BITMAP_FREE (use->related_cands);
5745 for (j = 0; j < use->n_map_members; j++)
5746 if (use->cost_map[j].depends_on)
5747 BITMAP_FREE (use->cost_map[j].depends_on);
5748 free (use->cost_map);
5749 free (use);
5751 VEC_truncate (iv_use_p, data->iv_uses, 0);
5753 for (i = 0; i < n_iv_cands (data); i++)
5755 struct iv_cand *cand = iv_cand (data, i);
5757 if (cand->iv)
5758 free (cand->iv);
5759 if (cand->depends_on)
5760 BITMAP_FREE (cand->depends_on);
5761 free (cand);
5763 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5765 if (data->version_info_size < num_ssa_names)
5767 data->version_info_size = 2 * num_ssa_names;
5768 free (data->version_info);
5769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5772 data->max_inv_id = 0;
5774 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5775 SET_DECL_RTL (obj, NULL_RTX);
5777 VEC_truncate (tree, decl_rtl_to_reset, 0);
5780 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5781 loop tree. */
5783 static void
5784 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5786 free_loop_data (data);
5787 free (data->version_info);
5788 BITMAP_FREE (data->relevant);
5789 BITMAP_FREE (data->important_candidates);
5791 VEC_free (tree, heap, decl_rtl_to_reset);
5792 VEC_free (iv_use_p, heap, data->iv_uses);
5793 VEC_free (iv_cand_p, heap, data->iv_candidates);
5796 /* Optimizes the LOOP. Returns true if anything changed. */
5798 static bool
5799 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5801 bool changed = false;
5802 struct iv_ca *iv_ca;
5803 edge exit;
5804 basic_block *body;
5806 gcc_assert (!data->niters);
5807 data->current_loop = loop;
5808 data->speed = optimize_loop_for_speed_p (loop);
5810 if (dump_file && (dump_flags & TDF_DETAILS))
5812 fprintf (dump_file, "Processing loop %d\n", loop->num);
5814 exit = single_dom_exit (loop);
5815 if (exit)
5817 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5818 exit->src->index, exit->dest->index);
5819 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5820 fprintf (dump_file, "\n");
5823 fprintf (dump_file, "\n");
5826 body = get_loop_body (loop);
5827 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5828 free (body);
5830 /* For each ssa name determines whether it behaves as an induction variable
5831 in some loop. */
5832 if (!find_induction_variables (data))
5833 goto finish;
5835 /* Finds interesting uses (item 1). */
5836 find_interesting_uses (data);
5837 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5838 goto finish;
5840 /* Finds candidates for the induction variables (item 2). */
5841 find_iv_candidates (data);
5843 /* Calculates the costs (item 3, part 1). */
5844 determine_iv_costs (data);
5845 determine_use_iv_costs (data);
5846 determine_set_costs (data);
5848 /* Find the optimal set of induction variables (item 3, part 2). */
5849 iv_ca = find_optimal_iv_set (data);
5850 if (!iv_ca)
5851 goto finish;
5852 changed = true;
5854 /* Create the new induction variables (item 4, part 1). */
5855 create_new_ivs (data, iv_ca);
5856 iv_ca_free (&iv_ca);
5858 /* Rewrite the uses (item 4, part 2). */
5859 rewrite_uses (data);
5861 /* Remove the ivs that are unused after rewriting. */
5862 remove_unused_ivs (data);
5864 /* We have changed the structure of induction variables; it might happen
5865 that definitions in the scev database refer to some of them that were
5866 eliminated. */
5867 scev_reset ();
5869 finish:
5870 free_loop_data (data);
5872 return changed;
5875 /* Main entry point. Optimizes induction variables in loops. */
5877 void
5878 tree_ssa_iv_optimize (void)
5880 struct loop *loop;
5881 struct ivopts_data data;
5882 loop_iterator li;
5884 tree_ssa_iv_optimize_init (&data);
5886 /* Optimize the loops starting with the innermost ones. */
5887 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5890 flow_loop_dump (loop, dump_file, NULL, 1);
5892 tree_ssa_iv_optimize_loop (&data, loop);
5895 tree_ssa_iv_optimize_finalize (&data);