re PR bootstrap/54281 (Fails to bootstrap with --disable-nls)
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobc0a825229d764dbe52b08244d7d39583ab81fa64
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
74 #include "cfgloop.h"
75 #include "tree-pass.h"
76 #include "ggc.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
79 #include "hashtab.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
82 #include "cfgloop.h"
83 #include "params.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
86 #include "target.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
89 #include "expmed.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94 #include "expr.h"
95 #include "recog.h"
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop *loop)
109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
113 return niter;
116 /* Representation of the induction variable. */
117 struct iv
119 tree base; /* Initial value of the iv. */
120 tree base_object; /* A memory object to that the induction variable points. */
121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
129 struct version_info
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv; /* For the original biv, whether to preserve it. */
136 unsigned inv_id; /* Id of an invariant. */
139 /* Types of uses. */
140 enum use_type
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
147 /* Cost of a computation. */
148 typedef struct
150 int cost; /* The runtime cost. */
151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155 } comp_cost;
157 static const comp_cost no_cost = {0, 0};
158 static const comp_cost infinite_cost = {INFTY, INFTY};
160 /* The candidate - cost pair. */
161 struct cost_pair
163 struct iv_cand *cand; /* The candidate. */
164 comp_cost cost; /* The cost. */
165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp; /* For iv elimination, the comparison. */
171 int inv_expr_id; /* Loop invariant expression id. */
174 /* Use. */
175 struct iv_use
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
180 gimple stmt; /* Statement in that it occurs. */
181 tree *op_p; /* The place where it occurs. */
182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand *selected;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
194 enum iv_position
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
200 IP_ORIGINAL /* The original biv. */
203 /* The induction variable candidate. */
204 struct iv_cand
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
210 gimple incremented_at;/* For original biv, the statement where it is
211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
229 tree expr;
230 int id;
231 hashval_t hash;
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use *iv_use_p;
237 DEF_VEC_P(iv_use_p);
238 DEF_VEC_ALLOC_P(iv_use_p,heap);
240 typedef struct iv_cand *iv_cand_p;
241 DEF_VEC_P(iv_cand_p);
242 DEF_VEC_ALLOC_P(iv_cand_p,heap);
244 struct ivopts_data
246 /* The currently optimized loop. */
247 struct loop *current_loop;
249 /* Numbers of iterations for all exits of the current loop. */
250 struct pointer_map_t *niters;
252 /* Number of registers used in it. */
253 unsigned regs_used;
255 /* The size of version_info array allocated. */
256 unsigned version_info_size;
258 /* The array of information for the ssa names. */
259 struct version_info *version_info;
261 /* The hashtable of loop invariant expressions created
262 by ivopt. */
263 htab_t inv_expr_tab;
265 /* Loop invariant expression id. */
266 int inv_expr_id;
268 /* The bitmap of indices in version_info whose value was changed. */
269 bitmap relevant;
271 /* The uses of induction variables. */
272 VEC(iv_use_p,heap) *iv_uses;
274 /* The candidates. */
275 VEC(iv_cand_p,heap) *iv_candidates;
277 /* A bitmap of important candidates. */
278 bitmap important_candidates;
280 /* The maximum invariant id. */
281 unsigned max_inv_id;
283 /* Whether to consider just related and important candidates when replacing a
284 use. */
285 bool consider_all_candidates;
287 /* Are we optimizing for speed? */
288 bool speed;
290 /* Whether the loop body includes any function calls. */
291 bool body_includes_call;
293 /* Whether the loop body can only be exited via single exit. */
294 bool loop_single_exit_p;
297 /* An assignment of iv candidates to uses. */
299 struct iv_ca
301 /* The number of uses covered by the assignment. */
302 unsigned upto;
304 /* Number of uses that cannot be expressed by the candidates in the set. */
305 unsigned bad_uses;
307 /* Candidate assigned to a use, together with the related costs. */
308 struct cost_pair **cand_for_use;
310 /* Number of times each candidate is used. */
311 unsigned *n_cand_uses;
313 /* The candidates used. */
314 bitmap cands;
316 /* The number of candidates in the set. */
317 unsigned n_cands;
319 /* Total number of registers needed. */
320 unsigned n_regs;
322 /* Total cost of expressing uses. */
323 comp_cost cand_use_cost;
325 /* Total cost of candidates. */
326 unsigned cand_cost;
328 /* Number of times each invariant is used. */
329 unsigned *n_invariant_uses;
331 /* The array holding the number of uses of each loop
332 invariant expressions created by ivopt. */
333 unsigned *used_inv_expr;
335 /* The number of created loop invariants. */
336 unsigned num_used_inv_expr;
338 /* Total cost of the assignment. */
339 comp_cost cost;
342 /* Difference of two iv candidate assignments. */
344 struct iv_ca_delta
346 /* Changed use. */
347 struct iv_use *use;
349 /* An old assignment (for rollback purposes). */
350 struct cost_pair *old_cp;
352 /* A new assignment. */
353 struct cost_pair *new_cp;
355 /* Next change in the list. */
356 struct iv_ca_delta *next_change;
359 /* Bound on number of candidates below that all candidates are considered. */
361 #define CONSIDER_ALL_CANDIDATES_BOUND \
362 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
365 optimizing such a loop would help, and it would take ages). */
367 #define MAX_CONSIDERED_USES \
368 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
370 /* If there are at most this number of ivs in the set, try removing unnecessary
371 ivs from the set always. */
373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
374 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
376 /* The list of trees for that the decl_rtl field must be reset is stored
377 here. */
379 static VEC(tree,heap) *decl_rtl_to_reset;
381 static comp_cost force_expr_to_var_cost (tree, bool);
383 /* Number of uses recorded in DATA. */
385 static inline unsigned
386 n_iv_uses (struct ivopts_data *data)
388 return VEC_length (iv_use_p, data->iv_uses);
391 /* Ith use recorded in DATA. */
393 static inline struct iv_use *
394 iv_use (struct ivopts_data *data, unsigned i)
396 return VEC_index (iv_use_p, data->iv_uses, i);
399 /* Number of candidates recorded in DATA. */
401 static inline unsigned
402 n_iv_cands (struct ivopts_data *data)
404 return VEC_length (iv_cand_p, data->iv_candidates);
407 /* Ith candidate recorded in DATA. */
409 static inline struct iv_cand *
410 iv_cand (struct ivopts_data *data, unsigned i)
412 return VEC_index (iv_cand_p, data->iv_candidates, i);
415 /* The single loop exit if it dominates the latch, NULL otherwise. */
417 edge
418 single_dom_exit (struct loop *loop)
420 edge exit = single_exit (loop);
422 if (!exit)
423 return NULL;
425 if (!just_once_each_iteration_p (loop, exit->src))
426 return NULL;
428 return exit;
431 /* Dumps information about the induction variable IV to FILE. */
433 extern void dump_iv (FILE *, struct iv *);
434 void
435 dump_iv (FILE *file, struct iv *iv)
437 if (iv->ssa_name)
439 fprintf (file, "ssa name ");
440 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
441 fprintf (file, "\n");
444 fprintf (file, " type ");
445 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
446 fprintf (file, "\n");
448 if (iv->step)
450 fprintf (file, " base ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " step ");
455 print_generic_expr (file, iv->step, TDF_SLIM);
456 fprintf (file, "\n");
458 else
460 fprintf (file, " invariant ");
461 print_generic_expr (file, iv->base, TDF_SLIM);
462 fprintf (file, "\n");
465 if (iv->base_object)
467 fprintf (file, " base object ");
468 print_generic_expr (file, iv->base_object, TDF_SLIM);
469 fprintf (file, "\n");
472 if (iv->biv_p)
473 fprintf (file, " is a biv\n");
476 /* Dumps information about the USE to FILE. */
478 extern void dump_use (FILE *, struct iv_use *);
479 void
480 dump_use (FILE *file, struct iv_use *use)
482 fprintf (file, "use %d\n", use->id);
484 switch (use->type)
486 case USE_NONLINEAR_EXPR:
487 fprintf (file, " generic\n");
488 break;
490 case USE_ADDRESS:
491 fprintf (file, " address\n");
492 break;
494 case USE_COMPARE:
495 fprintf (file, " compare\n");
496 break;
498 default:
499 gcc_unreachable ();
502 fprintf (file, " in statement ");
503 print_gimple_stmt (file, use->stmt, 0, 0);
504 fprintf (file, "\n");
506 fprintf (file, " at position ");
507 if (use->op_p)
508 print_generic_expr (file, *use->op_p, TDF_SLIM);
509 fprintf (file, "\n");
511 dump_iv (file, use->iv);
513 if (use->related_cands)
515 fprintf (file, " related candidates ");
516 dump_bitmap (file, use->related_cands);
520 /* Dumps information about the uses to FILE. */
522 extern void dump_uses (FILE *, struct ivopts_data *);
523 void
524 dump_uses (FILE *file, struct ivopts_data *data)
526 unsigned i;
527 struct iv_use *use;
529 for (i = 0; i < n_iv_uses (data); i++)
531 use = iv_use (data, i);
533 dump_use (file, use);
534 fprintf (file, "\n");
538 /* Dumps information about induction variable candidate CAND to FILE. */
540 extern void dump_cand (FILE *, struct iv_cand *);
541 void
542 dump_cand (FILE *file, struct iv_cand *cand)
544 struct iv *iv = cand->iv;
546 fprintf (file, "candidate %d%s\n",
547 cand->id, cand->important ? " (important)" : "");
549 if (cand->depends_on)
551 fprintf (file, " depends on ");
552 dump_bitmap (file, cand->depends_on);
555 if (!iv)
557 fprintf (file, " final value replacement\n");
558 return;
561 if (cand->var_before)
563 fprintf (file, " var_before ");
564 print_generic_expr (file, cand->var_before, TDF_SLIM);
565 fprintf (file, "\n");
567 if (cand->var_after)
569 fprintf (file, " var_after ");
570 print_generic_expr (file, cand->var_after, TDF_SLIM);
571 fprintf (file, "\n");
574 switch (cand->pos)
576 case IP_NORMAL:
577 fprintf (file, " incremented before exit test\n");
578 break;
580 case IP_BEFORE_USE:
581 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
582 break;
584 case IP_AFTER_USE:
585 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
586 break;
588 case IP_END:
589 fprintf (file, " incremented at end\n");
590 break;
592 case IP_ORIGINAL:
593 fprintf (file, " original biv\n");
594 break;
597 dump_iv (file, iv);
600 /* Returns the info for ssa version VER. */
602 static inline struct version_info *
603 ver_info (struct ivopts_data *data, unsigned ver)
605 return data->version_info + ver;
608 /* Returns the info for ssa name NAME. */
610 static inline struct version_info *
611 name_info (struct ivopts_data *data, tree name)
613 return ver_info (data, SSA_NAME_VERSION (name));
616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 emitted in LOOP. */
619 static bool
620 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
622 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
624 gcc_assert (bb);
626 if (sbb == loop->latch)
627 return true;
629 if (sbb != bb)
630 return false;
632 return stmt == last_stmt (bb);
635 /* Returns true if STMT if after the place where the original induction
636 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
637 if the positions are identical. */
639 static bool
640 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
642 basic_block cand_bb = gimple_bb (cand->incremented_at);
643 basic_block stmt_bb = gimple_bb (stmt);
645 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
646 return false;
648 if (stmt_bb != cand_bb)
649 return true;
651 if (true_if_equal
652 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
653 return true;
654 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
657 /* Returns true if STMT if after the place where the induction variable
658 CAND is incremented in LOOP. */
660 static bool
661 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
663 switch (cand->pos)
665 case IP_END:
666 return false;
668 case IP_NORMAL:
669 return stmt_after_ip_normal_pos (loop, stmt);
671 case IP_ORIGINAL:
672 case IP_AFTER_USE:
673 return stmt_after_inc_pos (cand, stmt, false);
675 case IP_BEFORE_USE:
676 return stmt_after_inc_pos (cand, stmt, true);
678 default:
679 gcc_unreachable ();
683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
685 static bool
686 abnormal_ssa_name_p (tree exp)
688 if (!exp)
689 return false;
691 if (TREE_CODE (exp) != SSA_NAME)
692 return false;
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
698 abnormal phi node. Callback for for_each_index. */
700 static bool
701 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
702 void *data ATTRIBUTE_UNUSED)
704 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
706 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
707 return false;
708 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
709 return false;
712 return !abnormal_ssa_name_p (*index);
715 /* Returns true if EXPR contains a ssa name that occurs in an
716 abnormal phi node. */
718 bool
719 contains_abnormal_ssa_name_p (tree expr)
721 enum tree_code code;
722 enum tree_code_class codeclass;
724 if (!expr)
725 return false;
727 code = TREE_CODE (expr);
728 codeclass = TREE_CODE_CLASS (code);
730 if (code == SSA_NAME)
731 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
733 if (code == INTEGER_CST
734 || is_gimple_min_invariant (expr))
735 return false;
737 if (code == ADDR_EXPR)
738 return !for_each_index (&TREE_OPERAND (expr, 0),
739 idx_contains_abnormal_ssa_name_p,
740 NULL);
742 if (code == COND_EXPR)
743 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
744 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
745 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
747 switch (codeclass)
749 case tcc_binary:
750 case tcc_comparison:
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
752 return true;
754 /* Fallthru. */
755 case tcc_unary:
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
757 return true;
759 break;
761 default:
762 gcc_unreachable ();
765 return false;
768 /* Returns the structure describing number of iterations determined from
769 EXIT of DATA->current_loop, or NULL if something goes wrong. */
771 static struct tree_niter_desc *
772 niter_for_exit (struct ivopts_data *data, edge exit)
774 struct tree_niter_desc *desc;
775 void **slot;
777 if (!data->niters)
779 data->niters = pointer_map_create ();
780 slot = NULL;
782 else
783 slot = pointer_map_contains (data->niters, exit);
785 if (!slot)
787 /* Try to determine number of iterations. We cannot safely work with ssa
788 names that appear in phi nodes on abnormal edges, so that we do not
789 create overlapping life ranges for them (PR 27283). */
790 desc = XNEW (struct tree_niter_desc);
791 if (!number_of_iterations_exit (data->current_loop,
792 exit, desc, true)
793 || contains_abnormal_ssa_name_p (desc->niter))
795 XDELETE (desc);
796 desc = NULL;
798 slot = pointer_map_insert (data->niters, exit);
799 *slot = desc;
801 else
802 desc = (struct tree_niter_desc *) *slot;
804 return desc;
807 /* Returns the structure describing number of iterations determined from
808 single dominating exit of DATA->current_loop, or NULL if something
809 goes wrong. */
811 static struct tree_niter_desc *
812 niter_for_single_dom_exit (struct ivopts_data *data)
814 edge exit = single_dom_exit (data->current_loop);
816 if (!exit)
817 return NULL;
819 return niter_for_exit (data, exit);
822 /* Hash table equality function for expressions. */
824 static int
825 htab_inv_expr_eq (const void *ent1, const void *ent2)
827 const struct iv_inv_expr_ent *expr1 =
828 (const struct iv_inv_expr_ent *)ent1;
829 const struct iv_inv_expr_ent *expr2 =
830 (const struct iv_inv_expr_ent *)ent2;
832 return expr1->hash == expr2->hash
833 && operand_equal_p (expr1->expr, expr2->expr, 0);
836 /* Hash function for loop invariant expressions. */
838 static hashval_t
839 htab_inv_expr_hash (const void *ent)
841 const struct iv_inv_expr_ent *expr =
842 (const struct iv_inv_expr_ent *)ent;
843 return expr->hash;
846 /* Initializes data structures used by the iv optimization pass, stored
847 in DATA. */
849 static void
850 tree_ssa_iv_optimize_init (struct ivopts_data *data)
852 data->version_info_size = 2 * num_ssa_names;
853 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
854 data->relevant = BITMAP_ALLOC (NULL);
855 data->important_candidates = BITMAP_ALLOC (NULL);
856 data->max_inv_id = 0;
857 data->niters = NULL;
858 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
859 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
860 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
861 htab_inv_expr_eq, free);
862 data->inv_expr_id = 0;
863 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
866 /* Returns a memory object to that EXPR points. In case we are able to
867 determine that it does not point to any such object, NULL is returned. */
869 static tree
870 determine_base_object (tree expr)
872 enum tree_code code = TREE_CODE (expr);
873 tree base, obj;
875 /* If this is a pointer casted to any type, we need to determine
876 the base object for the pointer; so handle conversions before
877 throwing away non-pointer expressions. */
878 if (CONVERT_EXPR_P (expr))
879 return determine_base_object (TREE_OPERAND (expr, 0));
881 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
882 return NULL_TREE;
884 switch (code)
886 case INTEGER_CST:
887 return NULL_TREE;
889 case ADDR_EXPR:
890 obj = TREE_OPERAND (expr, 0);
891 base = get_base_address (obj);
893 if (!base)
894 return expr;
896 if (TREE_CODE (base) == MEM_REF)
897 return determine_base_object (TREE_OPERAND (base, 0));
899 return fold_convert (ptr_type_node,
900 build_fold_addr_expr (base));
902 case POINTER_PLUS_EXPR:
903 return determine_base_object (TREE_OPERAND (expr, 0));
905 case PLUS_EXPR:
906 case MINUS_EXPR:
907 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
908 gcc_unreachable ();
910 default:
911 return fold_convert (ptr_type_node, expr);
915 /* Allocates an induction variable with given initial value BASE and step STEP
916 for loop LOOP. */
918 static struct iv *
919 alloc_iv (tree base, tree step)
921 struct iv *iv = XCNEW (struct iv);
922 gcc_assert (step != NULL_TREE);
924 iv->base = base;
925 iv->base_object = determine_base_object (base);
926 iv->step = step;
927 iv->biv_p = false;
928 iv->have_use_for = false;
929 iv->use_id = 0;
930 iv->ssa_name = NULL_TREE;
932 return iv;
935 /* Sets STEP and BASE for induction variable IV. */
937 static void
938 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
940 struct version_info *info = name_info (data, iv);
942 gcc_assert (!info->iv);
944 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
945 info->iv = alloc_iv (base, step);
946 info->iv->ssa_name = iv;
949 /* Finds induction variable declaration for VAR. */
951 static struct iv *
952 get_iv (struct ivopts_data *data, tree var)
954 basic_block bb;
955 tree type = TREE_TYPE (var);
957 if (!POINTER_TYPE_P (type)
958 && !INTEGRAL_TYPE_P (type))
959 return NULL;
961 if (!name_info (data, var)->iv)
963 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (!bb
966 || !flow_bb_inside_loop_p (data->current_loop, bb))
967 set_iv (data, var, var, build_int_cst (type, 0));
970 return name_info (data, var)->iv;
973 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
974 not define a simple affine biv with nonzero step. */
976 static tree
977 determine_biv_step (gimple phi)
979 struct loop *loop = gimple_bb (phi)->loop_father;
980 tree name = PHI_RESULT (phi);
981 affine_iv iv;
983 if (virtual_operand_p (name))
984 return NULL_TREE;
986 if (!simple_iv (loop, loop, name, &iv, true))
987 return NULL_TREE;
989 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
992 /* Finds basic ivs. */
994 static bool
995 find_bivs (struct ivopts_data *data)
997 gimple phi;
998 tree step, type, base;
999 bool found = false;
1000 struct loop *loop = data->current_loop;
1001 gimple_stmt_iterator psi;
1003 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1005 phi = gsi_stmt (psi);
1007 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008 continue;
1010 step = determine_biv_step (phi);
1011 if (!step)
1012 continue;
1014 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1015 base = expand_simple_operations (base);
1016 if (contains_abnormal_ssa_name_p (base)
1017 || contains_abnormal_ssa_name_p (step))
1018 continue;
1020 type = TREE_TYPE (PHI_RESULT (phi));
1021 base = fold_convert (type, base);
1022 if (step)
1024 if (POINTER_TYPE_P (type))
1025 step = convert_to_ptrofftype (step);
1026 else
1027 step = fold_convert (type, step);
1030 set_iv (data, PHI_RESULT (phi), base, step);
1031 found = true;
1034 return found;
1037 /* Marks basic ivs. */
1039 static void
1040 mark_bivs (struct ivopts_data *data)
1042 gimple phi;
1043 tree var;
1044 struct iv *iv, *incr_iv;
1045 struct loop *loop = data->current_loop;
1046 basic_block incr_bb;
1047 gimple_stmt_iterator psi;
1049 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1051 phi = gsi_stmt (psi);
1053 iv = get_iv (data, PHI_RESULT (phi));
1054 if (!iv)
1055 continue;
1057 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058 incr_iv = get_iv (data, var);
1059 if (!incr_iv)
1060 continue;
1062 /* If the increment is in the subloop, ignore it. */
1063 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064 if (incr_bb->loop_father != data->current_loop
1065 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066 continue;
1068 iv->biv_p = true;
1069 incr_iv->biv_p = true;
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074 parameters to IV. */
1076 static bool
1077 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1079 tree lhs;
1080 struct loop *loop = data->current_loop;
1082 iv->base = NULL_TREE;
1083 iv->step = NULL_TREE;
1085 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1086 return false;
1088 lhs = gimple_assign_lhs (stmt);
1089 if (TREE_CODE (lhs) != SSA_NAME)
1090 return false;
1092 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1093 return false;
1094 iv->base = expand_simple_operations (iv->base);
1096 if (contains_abnormal_ssa_name_p (iv->base)
1097 || contains_abnormal_ssa_name_p (iv->step))
1098 return false;
1100 /* If STMT could throw, then do not consider STMT as defining a GIV.
1101 While this will suppress optimizations, we can not safely delete this
1102 GIV and associated statements, even if it appears it is not used. */
1103 if (stmt_could_throw_p (stmt))
1104 return false;
1106 return true;
1109 /* Finds general ivs in statement STMT. */
1111 static void
1112 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1114 affine_iv iv;
1116 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1117 return;
1119 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1122 /* Finds general ivs in basic block BB. */
1124 static void
1125 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1127 gimple_stmt_iterator bsi;
1129 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130 find_givs_in_stmt (data, gsi_stmt (bsi));
1133 /* Finds general ivs. */
1135 static void
1136 find_givs (struct ivopts_data *data)
1138 struct loop *loop = data->current_loop;
1139 basic_block *body = get_loop_body_in_dom_order (loop);
1140 unsigned i;
1142 for (i = 0; i < loop->num_nodes; i++)
1143 find_givs_in_bb (data, body[i]);
1144 free (body);
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148 variable and if so, its initial value and step. */
1150 static bool
1151 find_induction_variables (struct ivopts_data *data)
1153 unsigned i;
1154 bitmap_iterator bi;
1156 if (!find_bivs (data))
1157 return false;
1159 find_givs (data);
1160 mark_bivs (data);
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1164 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1166 if (niter)
1168 fprintf (dump_file, " number of iterations ");
1169 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170 if (!integer_zerop (niter->may_be_zero))
1172 fprintf (dump_file, "; zero if ");
1173 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1175 fprintf (dump_file, "\n\n");
1178 fprintf (dump_file, "Induction variables:\n\n");
1180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1182 if (ver_info (data, i)->iv)
1183 dump_iv (dump_file, ver_info (data, i)->iv);
1187 return true;
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194 gimple stmt, enum use_type use_type)
1196 struct iv_use *use = XCNEW (struct iv_use);
1198 use->id = n_iv_uses (data);
1199 use->type = use_type;
1200 use->iv = iv;
1201 use->stmt = stmt;
1202 use->op_p = use_p;
1203 use->related_cands = BITMAP_ALLOC (NULL);
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1206 caller. */
1207 iv->ssa_name = NULL_TREE;
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 dump_use (dump_file, use);
1212 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1214 return use;
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1224 basic_block bb;
1225 struct version_info *info;
1227 if (TREE_CODE (op) != SSA_NAME
1228 || virtual_operand_p (op))
1229 return;
1231 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232 if (bb
1233 && flow_bb_inside_loop_p (data->current_loop, bb))
1234 return;
1236 info = name_info (data, op);
1237 info->name = op;
1238 info->has_nonlin_use |= nonlinear_use;
1239 if (!info->inv_id)
1240 info->inv_id = ++data->max_inv_id;
1241 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1244 /* Checks whether the use OP is interesting and if so, records it. */
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1249 struct iv *iv;
1250 struct iv *civ;
1251 gimple stmt;
1252 struct iv_use *use;
1254 if (TREE_CODE (op) != SSA_NAME)
1255 return NULL;
1257 iv = get_iv (data, op);
1258 if (!iv)
1259 return NULL;
1261 if (iv->have_use_for)
1263 use = iv_use (data, iv->use_id);
1265 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266 return use;
1269 if (integer_zerop (iv->step))
1271 record_invariant (data, op, true);
1272 return NULL;
1274 iv->have_use_for = true;
1276 civ = XNEW (struct iv);
1277 *civ = *iv;
1279 stmt = SSA_NAME_DEF_STMT (op);
1280 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281 || is_gimple_assign (stmt));
1283 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284 iv->use_id = use->id;
1286 return use;
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299 tree **control_var, tree **bound,
1300 struct iv **iv_var, struct iv **iv_bound)
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv;
1304 static tree zero;
1305 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307 bool ret = false;
1309 if (gimple_code (stmt) == GIMPLE_COND)
1311 op0 = gimple_cond_lhs_ptr (stmt);
1312 op1 = gimple_cond_rhs_ptr (stmt);
1314 else
1316 op0 = gimple_assign_rhs1_ptr (stmt);
1317 op1 = gimple_assign_rhs2_ptr (stmt);
1320 zero = integer_zero_node;
1321 const_iv.step = integer_zero_node;
1323 if (TREE_CODE (*op0) == SSA_NAME)
1324 iv0 = get_iv (data, *op0);
1325 if (TREE_CODE (*op1) == SSA_NAME)
1326 iv1 = get_iv (data, *op1);
1328 /* Exactly one of the compared values must be an iv, and the other one must
1329 be an invariant. */
1330 if (!iv0 || !iv1)
1331 goto end;
1333 if (integer_zerop (iv0->step))
1335 /* Control variable may be on the other side. */
1336 tmp_op = op0; op0 = op1; op1 = tmp_op;
1337 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1339 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1341 end:
1342 if (control_var)
1343 *control_var = op0;;
1344 if (iv_var)
1345 *iv_var = iv0;;
1346 if (bound)
1347 *bound = op1;
1348 if (iv_bound)
1349 *iv_bound = iv1;
1351 return ret;
1354 /* Checks whether the condition in STMT is interesting and if so,
1355 records it. */
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1360 tree *var_p, *bound_p;
1361 struct iv *var_iv, *civ;
1363 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1365 find_interesting_uses_op (data, *var_p);
1366 find_interesting_uses_op (data, *bound_p);
1367 return;
1370 civ = XNEW (struct iv);
1371 *civ = *var_iv;
1372 record_use (data, NULL, civ, stmt, USE_COMPARE);
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1382 basic_block def_bb;
1383 unsigned i, len;
1385 gcc_assert (loop_depth (loop) > 0);
1387 if (is_gimple_min_invariant (expr))
1388 return true;
1390 if (TREE_CODE (expr) == SSA_NAME)
1392 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393 if (def_bb
1394 && flow_bb_inside_loop_p (loop, def_bb))
1395 return false;
1397 return true;
1400 if (!EXPR_P (expr))
1401 return false;
1403 len = TREE_OPERAND_LENGTH (expr);
1404 for (i = 0; i < len; i++)
1405 if (TREE_OPERAND (expr, i)
1406 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1407 return false;
1409 return true;
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413 i.e. if all its operands on the RHS are defined outside of the LOOP.
1414 LOOP should not be the function body. */
1416 bool
1417 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1419 unsigned i;
1420 tree lhs;
1422 gcc_assert (loop_depth (loop) > 0);
1424 lhs = gimple_get_lhs (stmt);
1425 for (i = 0; i < gimple_num_ops (stmt); i++)
1427 tree op = gimple_op (stmt, i);
1428 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429 return false;
1432 return true;
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436 initial ones. Returns false when the value of the index cannot be determined.
1437 Callback for for_each_index. */
1439 struct ifs_ivopts_data
1441 struct ivopts_data *ivopts_data;
1442 gimple stmt;
1443 tree step;
1446 static bool
1447 idx_find_step (tree base, tree *idx, void *data)
1449 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450 struct iv *iv;
1451 tree step, iv_base, iv_step, lbound, off;
1452 struct loop *loop = dta->ivopts_data->current_loop;
1454 /* If base is a component ref, require that the offset of the reference
1455 be invariant. */
1456 if (TREE_CODE (base) == COMPONENT_REF)
1458 off = component_ref_field_offset (base);
1459 return expr_invariant_in_loop_p (loop, off);
1462 /* If base is array, first check whether we will be able to move the
1463 reference out of the loop (in order to take its address in strength
1464 reduction). In order for this to work we need both lower bound
1465 and step to be loop invariants. */
1466 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1468 /* Moreover, for a range, the size needs to be invariant as well. */
1469 if (TREE_CODE (base) == ARRAY_RANGE_REF
1470 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471 return false;
1473 step = array_ref_element_size (base);
1474 lbound = array_ref_low_bound (base);
1476 if (!expr_invariant_in_loop_p (loop, step)
1477 || !expr_invariant_in_loop_p (loop, lbound))
1478 return false;
1481 if (TREE_CODE (*idx) != SSA_NAME)
1482 return true;
1484 iv = get_iv (dta->ivopts_data, *idx);
1485 if (!iv)
1486 return false;
1488 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1489 *&x[0], which is not folded and does not trigger the
1490 ARRAY_REF path below. */
1491 *idx = iv->base;
1493 if (integer_zerop (iv->step))
1494 return true;
1496 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1498 step = array_ref_element_size (base);
1500 /* We only handle addresses whose step is an integer constant. */
1501 if (TREE_CODE (step) != INTEGER_CST)
1502 return false;
1504 else
1505 /* The step for pointer arithmetics already is 1 byte. */
1506 step = size_one_node;
1508 iv_base = iv->base;
1509 iv_step = iv->step;
1510 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511 sizetype, &iv_base, &iv_step, dta->stmt,
1512 false))
1514 /* The index might wrap. */
1515 return false;
1518 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1519 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1521 return true;
1524 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1525 object is passed to it in DATA. */
1527 static bool
1528 idx_record_use (tree base, tree *idx,
1529 void *vdata)
1531 struct ivopts_data *data = (struct ivopts_data *) vdata;
1532 find_interesting_uses_op (data, *idx);
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1535 find_interesting_uses_op (data, array_ref_element_size (base));
1536 find_interesting_uses_op (data, array_ref_low_bound (base));
1538 return true;
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542 store cst to MUL and return true. Otherwise return false.
1543 The returned value is always sign-extended, regardless of the
1544 signedness of TOP and BOT. */
1546 static bool
1547 constant_multiple_of (tree top, tree bot, double_int *mul)
1549 tree mby;
1550 enum tree_code code;
1551 double_int res, p0, p1;
1552 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1554 STRIP_NOPS (top);
1555 STRIP_NOPS (bot);
1557 if (operand_equal_p (top, bot, 0))
1559 *mul = double_int_one;
1560 return true;
1563 code = TREE_CODE (top);
1564 switch (code)
1566 case MULT_EXPR:
1567 mby = TREE_OPERAND (top, 1);
1568 if (TREE_CODE (mby) != INTEGER_CST)
1569 return false;
1571 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572 return false;
1574 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1575 precision);
1576 return true;
1578 case PLUS_EXPR:
1579 case MINUS_EXPR:
1580 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1581 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1582 return false;
1584 if (code == MINUS_EXPR)
1585 p1 = double_int_neg (p1);
1586 *mul = double_int_sext (double_int_add (p0, p1), precision);
1587 return true;
1589 case INTEGER_CST:
1590 if (TREE_CODE (bot) != INTEGER_CST)
1591 return false;
1593 p0 = double_int_sext (tree_to_double_int (top), precision);
1594 p1 = double_int_sext (tree_to_double_int (bot), precision);
1595 if (double_int_zero_p (p1))
1596 return false;
1597 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1598 precision);
1599 return double_int_zero_p (res);
1601 default:
1602 return false;
1606 /* Returns true if memory reference REF with step STEP may be unaligned. */
1608 static bool
1609 may_be_unaligned_p (tree ref, tree step)
1611 tree base;
1612 tree base_type;
1613 HOST_WIDE_INT bitsize;
1614 HOST_WIDE_INT bitpos;
1615 tree toffset;
1616 enum machine_mode mode;
1617 int unsignedp, volatilep;
1618 unsigned base_align;
1620 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1621 thus they are not misaligned. */
1622 if (TREE_CODE (ref) == TARGET_MEM_REF)
1623 return false;
1625 /* The test below is basically copy of what expr.c:normal_inner_ref
1626 does to check whether the object must be loaded by parts when
1627 STRICT_ALIGNMENT is true. */
1628 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1629 &unsignedp, &volatilep, true);
1630 base_type = TREE_TYPE (base);
1631 base_align = get_object_alignment (base);
1632 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1634 if (mode != BLKmode)
1636 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1638 if (base_align < mode_align
1639 || (bitpos % mode_align) != 0
1640 || (bitpos % BITS_PER_UNIT) != 0)
1641 return true;
1643 if (toffset
1644 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1645 return true;
1647 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1648 return true;
1651 return false;
1654 /* Return true if EXPR may be non-addressable. */
1656 bool
1657 may_be_nonaddressable_p (tree expr)
1659 switch (TREE_CODE (expr))
1661 case TARGET_MEM_REF:
1662 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1663 target, thus they are always addressable. */
1664 return false;
1666 case COMPONENT_REF:
1667 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1668 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1670 case VIEW_CONVERT_EXPR:
1671 /* This kind of view-conversions may wrap non-addressable objects
1672 and make them look addressable. After some processing the
1673 non-addressability may be uncovered again, causing ADDR_EXPRs
1674 of inappropriate objects to be built. */
1675 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1676 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1677 return true;
1679 /* ... fall through ... */
1681 case ARRAY_REF:
1682 case ARRAY_RANGE_REF:
1683 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1685 CASE_CONVERT:
1686 return true;
1688 default:
1689 break;
1692 return false;
1695 /* Finds addresses in *OP_P inside STMT. */
1697 static void
1698 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1700 tree base = *op_p, step = size_zero_node;
1701 struct iv *civ;
1702 struct ifs_ivopts_data ifs_ivopts_data;
1704 /* Do not play with volatile memory references. A bit too conservative,
1705 perhaps, but safe. */
1706 if (gimple_has_volatile_ops (stmt))
1707 goto fail;
1709 /* Ignore bitfields for now. Not really something terribly complicated
1710 to handle. TODO. */
1711 if (TREE_CODE (base) == BIT_FIELD_REF)
1712 goto fail;
1714 base = unshare_expr (base);
1716 if (TREE_CODE (base) == TARGET_MEM_REF)
1718 tree type = build_pointer_type (TREE_TYPE (base));
1719 tree astep;
1721 if (TMR_BASE (base)
1722 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1724 civ = get_iv (data, TMR_BASE (base));
1725 if (!civ)
1726 goto fail;
1728 TMR_BASE (base) = civ->base;
1729 step = civ->step;
1731 if (TMR_INDEX2 (base)
1732 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1734 civ = get_iv (data, TMR_INDEX2 (base));
1735 if (!civ)
1736 goto fail;
1738 TMR_INDEX2 (base) = civ->base;
1739 step = civ->step;
1741 if (TMR_INDEX (base)
1742 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1744 civ = get_iv (data, TMR_INDEX (base));
1745 if (!civ)
1746 goto fail;
1748 TMR_INDEX (base) = civ->base;
1749 astep = civ->step;
1751 if (astep)
1753 if (TMR_STEP (base))
1754 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1756 step = fold_build2 (PLUS_EXPR, type, step, astep);
1760 if (integer_zerop (step))
1761 goto fail;
1762 base = tree_mem_ref_addr (type, base);
1764 else
1766 ifs_ivopts_data.ivopts_data = data;
1767 ifs_ivopts_data.stmt = stmt;
1768 ifs_ivopts_data.step = size_zero_node;
1769 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1770 || integer_zerop (ifs_ivopts_data.step))
1771 goto fail;
1772 step = ifs_ivopts_data.step;
1774 /* Check that the base expression is addressable. This needs
1775 to be done after substituting bases of IVs into it. */
1776 if (may_be_nonaddressable_p (base))
1777 goto fail;
1779 /* Moreover, on strict alignment platforms, check that it is
1780 sufficiently aligned. */
1781 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1782 goto fail;
1784 base = build_fold_addr_expr (base);
1786 /* Substituting bases of IVs into the base expression might
1787 have caused folding opportunities. */
1788 if (TREE_CODE (base) == ADDR_EXPR)
1790 tree *ref = &TREE_OPERAND (base, 0);
1791 while (handled_component_p (*ref))
1792 ref = &TREE_OPERAND (*ref, 0);
1793 if (TREE_CODE (*ref) == MEM_REF)
1795 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1796 TREE_OPERAND (*ref, 0),
1797 TREE_OPERAND (*ref, 1));
1798 if (tem)
1799 *ref = tem;
1804 civ = alloc_iv (base, step);
1805 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1806 return;
1808 fail:
1809 for_each_index (op_p, idx_record_use, data);
1812 /* Finds and records invariants used in STMT. */
1814 static void
1815 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1817 ssa_op_iter iter;
1818 use_operand_p use_p;
1819 tree op;
1821 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1823 op = USE_FROM_PTR (use_p);
1824 record_invariant (data, op, false);
1828 /* Finds interesting uses of induction variables in the statement STMT. */
1830 static void
1831 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1833 struct iv *iv;
1834 tree op, *lhs, *rhs;
1835 ssa_op_iter iter;
1836 use_operand_p use_p;
1837 enum tree_code code;
1839 find_invariants_stmt (data, stmt);
1841 if (gimple_code (stmt) == GIMPLE_COND)
1843 find_interesting_uses_cond (data, stmt);
1844 return;
1847 if (is_gimple_assign (stmt))
1849 lhs = gimple_assign_lhs_ptr (stmt);
1850 rhs = gimple_assign_rhs1_ptr (stmt);
1852 if (TREE_CODE (*lhs) == SSA_NAME)
1854 /* If the statement defines an induction variable, the uses are not
1855 interesting by themselves. */
1857 iv = get_iv (data, *lhs);
1859 if (iv && !integer_zerop (iv->step))
1860 return;
1863 code = gimple_assign_rhs_code (stmt);
1864 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1865 && (REFERENCE_CLASS_P (*rhs)
1866 || is_gimple_val (*rhs)))
1868 if (REFERENCE_CLASS_P (*rhs))
1869 find_interesting_uses_address (data, stmt, rhs);
1870 else
1871 find_interesting_uses_op (data, *rhs);
1873 if (REFERENCE_CLASS_P (*lhs))
1874 find_interesting_uses_address (data, stmt, lhs);
1875 return;
1877 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1879 find_interesting_uses_cond (data, stmt);
1880 return;
1883 /* TODO -- we should also handle address uses of type
1885 memory = call (whatever);
1889 call (memory). */
1892 if (gimple_code (stmt) == GIMPLE_PHI
1893 && gimple_bb (stmt) == data->current_loop->header)
1895 iv = get_iv (data, PHI_RESULT (stmt));
1897 if (iv && !integer_zerop (iv->step))
1898 return;
1901 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1903 op = USE_FROM_PTR (use_p);
1905 if (TREE_CODE (op) != SSA_NAME)
1906 continue;
1908 iv = get_iv (data, op);
1909 if (!iv)
1910 continue;
1912 find_interesting_uses_op (data, op);
1916 /* Finds interesting uses of induction variables outside of loops
1917 on loop exit edge EXIT. */
1919 static void
1920 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1922 gimple phi;
1923 gimple_stmt_iterator psi;
1924 tree def;
1926 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1928 phi = gsi_stmt (psi);
1929 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1930 if (!virtual_operand_p (def))
1931 find_interesting_uses_op (data, def);
1935 /* Finds uses of the induction variables that are interesting. */
1937 static void
1938 find_interesting_uses (struct ivopts_data *data)
1940 basic_block bb;
1941 gimple_stmt_iterator bsi;
1942 basic_block *body = get_loop_body (data->current_loop);
1943 unsigned i;
1944 struct version_info *info;
1945 edge e;
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1948 fprintf (dump_file, "Uses:\n\n");
1950 for (i = 0; i < data->current_loop->num_nodes; i++)
1952 edge_iterator ei;
1953 bb = body[i];
1955 FOR_EACH_EDGE (e, ei, bb->succs)
1956 if (e->dest != EXIT_BLOCK_PTR
1957 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1958 find_interesting_uses_outside (data, e);
1960 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1962 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1963 if (!is_gimple_debug (gsi_stmt (bsi)))
1964 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1969 bitmap_iterator bi;
1971 fprintf (dump_file, "\n");
1973 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1975 info = ver_info (data, i);
1976 if (info->inv_id)
1978 fprintf (dump_file, " ");
1979 print_generic_expr (dump_file, info->name, TDF_SLIM);
1980 fprintf (dump_file, " is invariant (%d)%s\n",
1981 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1985 fprintf (dump_file, "\n");
1988 free (body);
1991 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1992 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1993 we are at the top-level of the processed address. */
1995 static tree
1996 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1997 unsigned HOST_WIDE_INT *offset)
1999 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2000 enum tree_code code;
2001 tree type, orig_type = TREE_TYPE (expr);
2002 unsigned HOST_WIDE_INT off0, off1, st;
2003 tree orig_expr = expr;
2005 STRIP_NOPS (expr);
2007 type = TREE_TYPE (expr);
2008 code = TREE_CODE (expr);
2009 *offset = 0;
2011 switch (code)
2013 case INTEGER_CST:
2014 if (!cst_and_fits_in_hwi (expr)
2015 || integer_zerop (expr))
2016 return orig_expr;
2018 *offset = int_cst_value (expr);
2019 return build_int_cst (orig_type, 0);
2021 case POINTER_PLUS_EXPR:
2022 case PLUS_EXPR:
2023 case MINUS_EXPR:
2024 op0 = TREE_OPERAND (expr, 0);
2025 op1 = TREE_OPERAND (expr, 1);
2027 op0 = strip_offset_1 (op0, false, false, &off0);
2028 op1 = strip_offset_1 (op1, false, false, &off1);
2030 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2031 if (op0 == TREE_OPERAND (expr, 0)
2032 && op1 == TREE_OPERAND (expr, 1))
2033 return orig_expr;
2035 if (integer_zerop (op1))
2036 expr = op0;
2037 else if (integer_zerop (op0))
2039 if (code == MINUS_EXPR)
2040 expr = fold_build1 (NEGATE_EXPR, type, op1);
2041 else
2042 expr = op1;
2044 else
2045 expr = fold_build2 (code, type, op0, op1);
2047 return fold_convert (orig_type, expr);
2049 case MULT_EXPR:
2050 op1 = TREE_OPERAND (expr, 1);
2051 if (!cst_and_fits_in_hwi (op1))
2052 return orig_expr;
2054 op0 = TREE_OPERAND (expr, 0);
2055 op0 = strip_offset_1 (op0, false, false, &off0);
2056 if (op0 == TREE_OPERAND (expr, 0))
2057 return orig_expr;
2059 *offset = off0 * int_cst_value (op1);
2060 if (integer_zerop (op0))
2061 expr = op0;
2062 else
2063 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2065 return fold_convert (orig_type, expr);
2067 case ARRAY_REF:
2068 case ARRAY_RANGE_REF:
2069 if (!inside_addr)
2070 return orig_expr;
2072 step = array_ref_element_size (expr);
2073 if (!cst_and_fits_in_hwi (step))
2074 break;
2076 st = int_cst_value (step);
2077 op1 = TREE_OPERAND (expr, 1);
2078 op1 = strip_offset_1 (op1, false, false, &off1);
2079 *offset = off1 * st;
2081 if (top_compref
2082 && integer_zerop (op1))
2084 /* Strip the component reference completely. */
2085 op0 = TREE_OPERAND (expr, 0);
2086 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2087 *offset += off0;
2088 return op0;
2090 break;
2092 case COMPONENT_REF:
2093 if (!inside_addr)
2094 return orig_expr;
2096 tmp = component_ref_field_offset (expr);
2097 if (top_compref
2098 && cst_and_fits_in_hwi (tmp))
2100 /* Strip the component reference completely. */
2101 op0 = TREE_OPERAND (expr, 0);
2102 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2103 *offset = off0 + int_cst_value (tmp);
2104 return op0;
2106 break;
2108 case ADDR_EXPR:
2109 op0 = TREE_OPERAND (expr, 0);
2110 op0 = strip_offset_1 (op0, true, true, &off0);
2111 *offset += off0;
2113 if (op0 == TREE_OPERAND (expr, 0))
2114 return orig_expr;
2116 expr = build_fold_addr_expr (op0);
2117 return fold_convert (orig_type, expr);
2119 case MEM_REF:
2120 /* ??? Offset operand? */
2121 inside_addr = false;
2122 break;
2124 default:
2125 return orig_expr;
2128 /* Default handling of expressions for that we want to recurse into
2129 the first operand. */
2130 op0 = TREE_OPERAND (expr, 0);
2131 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2132 *offset += off0;
2134 if (op0 == TREE_OPERAND (expr, 0)
2135 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2136 return orig_expr;
2138 expr = copy_node (expr);
2139 TREE_OPERAND (expr, 0) = op0;
2140 if (op1)
2141 TREE_OPERAND (expr, 1) = op1;
2143 /* Inside address, we might strip the top level component references,
2144 thus changing type of the expression. Handling of ADDR_EXPR
2145 will fix that. */
2146 expr = fold_convert (orig_type, expr);
2148 return expr;
2151 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2153 static tree
2154 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2156 return strip_offset_1 (expr, false, false, offset);
2159 /* Returns variant of TYPE that can be used as base for different uses.
2160 We return unsigned type with the same precision, which avoids problems
2161 with overflows. */
2163 static tree
2164 generic_type_for (tree type)
2166 if (POINTER_TYPE_P (type))
2167 return unsigned_type_for (type);
2169 if (TYPE_UNSIGNED (type))
2170 return type;
2172 return unsigned_type_for (type);
2175 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2176 the bitmap to that we should store it. */
2178 static struct ivopts_data *fd_ivopts_data;
2179 static tree
2180 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2182 bitmap *depends_on = (bitmap *) data;
2183 struct version_info *info;
2185 if (TREE_CODE (*expr_p) != SSA_NAME)
2186 return NULL_TREE;
2187 info = name_info (fd_ivopts_data, *expr_p);
2189 if (!info->inv_id || info->has_nonlin_use)
2190 return NULL_TREE;
2192 if (!*depends_on)
2193 *depends_on = BITMAP_ALLOC (NULL);
2194 bitmap_set_bit (*depends_on, info->inv_id);
2196 return NULL_TREE;
2199 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2200 position to POS. If USE is not NULL, the candidate is set as related to
2201 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2202 replacement of the final value of the iv by a direct computation. */
2204 static struct iv_cand *
2205 add_candidate_1 (struct ivopts_data *data,
2206 tree base, tree step, bool important, enum iv_position pos,
2207 struct iv_use *use, gimple incremented_at)
2209 unsigned i;
2210 struct iv_cand *cand = NULL;
2211 tree type, orig_type;
2213 /* For non-original variables, make sure their values are computed in a type
2214 that does not invoke undefined behavior on overflows (since in general,
2215 we cannot prove that these induction variables are non-wrapping). */
2216 if (pos != IP_ORIGINAL)
2218 orig_type = TREE_TYPE (base);
2219 type = generic_type_for (orig_type);
2220 if (type != orig_type)
2222 base = fold_convert (type, base);
2223 step = fold_convert (type, step);
2227 for (i = 0; i < n_iv_cands (data); i++)
2229 cand = iv_cand (data, i);
2231 if (cand->pos != pos)
2232 continue;
2234 if (cand->incremented_at != incremented_at
2235 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2236 && cand->ainc_use != use))
2237 continue;
2239 if (!cand->iv)
2241 if (!base && !step)
2242 break;
2244 continue;
2247 if (!base && !step)
2248 continue;
2250 if (operand_equal_p (base, cand->iv->base, 0)
2251 && operand_equal_p (step, cand->iv->step, 0)
2252 && (TYPE_PRECISION (TREE_TYPE (base))
2253 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2254 break;
2257 if (i == n_iv_cands (data))
2259 cand = XCNEW (struct iv_cand);
2260 cand->id = i;
2262 if (!base && !step)
2263 cand->iv = NULL;
2264 else
2265 cand->iv = alloc_iv (base, step);
2267 cand->pos = pos;
2268 if (pos != IP_ORIGINAL && cand->iv)
2270 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2271 cand->var_after = cand->var_before;
2273 cand->important = important;
2274 cand->incremented_at = incremented_at;
2275 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2277 if (step
2278 && TREE_CODE (step) != INTEGER_CST)
2280 fd_ivopts_data = data;
2281 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2284 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2285 cand->ainc_use = use;
2286 else
2287 cand->ainc_use = NULL;
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2290 dump_cand (dump_file, cand);
2293 if (important && !cand->important)
2295 cand->important = true;
2296 if (dump_file && (dump_flags & TDF_DETAILS))
2297 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2300 if (use)
2302 bitmap_set_bit (use->related_cands, i);
2303 if (dump_file && (dump_flags & TDF_DETAILS))
2304 fprintf (dump_file, "Candidate %d is related to use %d\n",
2305 cand->id, use->id);
2308 return cand;
2311 /* Returns true if incrementing the induction variable at the end of the LOOP
2312 is allowed.
2314 The purpose is to avoid splitting latch edge with a biv increment, thus
2315 creating a jump, possibly confusing other optimization passes and leaving
2316 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2317 is not available (so we do not have a better alternative), or if the latch
2318 edge is already nonempty. */
2320 static bool
2321 allow_ip_end_pos_p (struct loop *loop)
2323 if (!ip_normal_pos (loop))
2324 return true;
2326 if (!empty_block_p (ip_end_pos (loop)))
2327 return true;
2329 return false;
2332 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2333 Important field is set to IMPORTANT. */
2335 static void
2336 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2337 bool important, struct iv_use *use)
2339 basic_block use_bb = gimple_bb (use->stmt);
2340 enum machine_mode mem_mode;
2341 unsigned HOST_WIDE_INT cstepi;
2343 /* If we insert the increment in any position other than the standard
2344 ones, we must ensure that it is incremented once per iteration.
2345 It must not be in an inner nested loop, or one side of an if
2346 statement. */
2347 if (use_bb->loop_father != data->current_loop
2348 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2349 || stmt_could_throw_p (use->stmt)
2350 || !cst_and_fits_in_hwi (step))
2351 return;
2353 cstepi = int_cst_value (step);
2355 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2356 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2357 || USE_STORE_PRE_INCREMENT (mem_mode))
2358 && GET_MODE_SIZE (mem_mode) == cstepi)
2359 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2360 || USE_STORE_PRE_DECREMENT (mem_mode))
2361 && GET_MODE_SIZE (mem_mode) == -cstepi))
2363 enum tree_code code = MINUS_EXPR;
2364 tree new_base;
2365 tree new_step = step;
2367 if (POINTER_TYPE_P (TREE_TYPE (base)))
2369 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2370 code = POINTER_PLUS_EXPR;
2372 else
2373 new_step = fold_convert (TREE_TYPE (base), new_step);
2374 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2375 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2376 use->stmt);
2378 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2379 || USE_STORE_POST_INCREMENT (mem_mode))
2380 && GET_MODE_SIZE (mem_mode) == cstepi)
2381 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2382 || USE_STORE_POST_DECREMENT (mem_mode))
2383 && GET_MODE_SIZE (mem_mode) == -cstepi))
2385 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386 use->stmt);
2390 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2391 position to POS. If USE is not NULL, the candidate is set as related to
2392 it. The candidate computation is scheduled on all available positions. */
2394 static void
2395 add_candidate (struct ivopts_data *data,
2396 tree base, tree step, bool important, struct iv_use *use)
2398 if (ip_normal_pos (data->current_loop))
2399 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400 if (ip_end_pos (data->current_loop)
2401 && allow_ip_end_pos_p (data->current_loop))
2402 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2404 if (use != NULL && use->type == USE_ADDRESS)
2405 add_autoinc_candidates (data, base, step, important, use);
2408 /* Adds standard iv candidates. */
2410 static void
2411 add_standard_iv_candidates (struct ivopts_data *data)
2413 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2415 /* The same for a double-integer type if it is still fast enough. */
2416 if (TYPE_PRECISION
2417 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2418 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2419 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2420 build_int_cst (long_integer_type_node, 1), true, NULL);
2422 /* The same for a double-integer type if it is still fast enough. */
2423 if (TYPE_PRECISION
2424 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2425 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2426 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2427 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2431 /* Adds candidates bases on the old induction variable IV. */
2433 static void
2434 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2436 gimple phi;
2437 tree def;
2438 struct iv_cand *cand;
2440 add_candidate (data, iv->base, iv->step, true, NULL);
2442 /* The same, but with initial value zero. */
2443 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2444 add_candidate (data, size_int (0), iv->step, true, NULL);
2445 else
2446 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2447 iv->step, true, NULL);
2449 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2450 if (gimple_code (phi) == GIMPLE_PHI)
2452 /* Additionally record the possibility of leaving the original iv
2453 untouched. */
2454 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2455 cand = add_candidate_1 (data,
2456 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2457 SSA_NAME_DEF_STMT (def));
2458 cand->var_before = iv->ssa_name;
2459 cand->var_after = def;
2463 /* Adds candidates based on the old induction variables. */
2465 static void
2466 add_old_ivs_candidates (struct ivopts_data *data)
2468 unsigned i;
2469 struct iv *iv;
2470 bitmap_iterator bi;
2472 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2474 iv = ver_info (data, i)->iv;
2475 if (iv && iv->biv_p && !integer_zerop (iv->step))
2476 add_old_iv_candidates (data, iv);
2480 /* Adds candidates based on the value of the induction variable IV and USE. */
2482 static void
2483 add_iv_value_candidates (struct ivopts_data *data,
2484 struct iv *iv, struct iv_use *use)
2486 unsigned HOST_WIDE_INT offset;
2487 tree base;
2488 tree basetype;
2490 add_candidate (data, iv->base, iv->step, false, use);
2492 /* The same, but with initial value zero. Make such variable important,
2493 since it is generic enough so that possibly many uses may be based
2494 on it. */
2495 basetype = TREE_TYPE (iv->base);
2496 if (POINTER_TYPE_P (basetype))
2497 basetype = sizetype;
2498 add_candidate (data, build_int_cst (basetype, 0),
2499 iv->step, true, use);
2501 /* Third, try removing the constant offset. Make sure to even
2502 add a candidate for &a[0] vs. (T *)&a. */
2503 base = strip_offset (iv->base, &offset);
2504 if (offset
2505 || base != iv->base)
2506 add_candidate (data, base, iv->step, false, use);
2509 /* Adds candidates based on the uses. */
2511 static void
2512 add_derived_ivs_candidates (struct ivopts_data *data)
2514 unsigned i;
2516 for (i = 0; i < n_iv_uses (data); i++)
2518 struct iv_use *use = iv_use (data, i);
2520 if (!use)
2521 continue;
2523 switch (use->type)
2525 case USE_NONLINEAR_EXPR:
2526 case USE_COMPARE:
2527 case USE_ADDRESS:
2528 /* Just add the ivs based on the value of the iv used here. */
2529 add_iv_value_candidates (data, use->iv, use);
2530 break;
2532 default:
2533 gcc_unreachable ();
2538 /* Record important candidates and add them to related_cands bitmaps
2539 if needed. */
2541 static void
2542 record_important_candidates (struct ivopts_data *data)
2544 unsigned i;
2545 struct iv_use *use;
2547 for (i = 0; i < n_iv_cands (data); i++)
2549 struct iv_cand *cand = iv_cand (data, i);
2551 if (cand->important)
2552 bitmap_set_bit (data->important_candidates, i);
2555 data->consider_all_candidates = (n_iv_cands (data)
2556 <= CONSIDER_ALL_CANDIDATES_BOUND);
2558 if (data->consider_all_candidates)
2560 /* We will not need "related_cands" bitmaps in this case,
2561 so release them to decrease peak memory consumption. */
2562 for (i = 0; i < n_iv_uses (data); i++)
2564 use = iv_use (data, i);
2565 BITMAP_FREE (use->related_cands);
2568 else
2570 /* Add important candidates to the related_cands bitmaps. */
2571 for (i = 0; i < n_iv_uses (data); i++)
2572 bitmap_ior_into (iv_use (data, i)->related_cands,
2573 data->important_candidates);
2577 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2578 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2579 we allocate a simple list to every use. */
2581 static void
2582 alloc_use_cost_map (struct ivopts_data *data)
2584 unsigned i, size, s, j;
2586 for (i = 0; i < n_iv_uses (data); i++)
2588 struct iv_use *use = iv_use (data, i);
2589 bitmap_iterator bi;
2591 if (data->consider_all_candidates)
2592 size = n_iv_cands (data);
2593 else
2595 s = 0;
2596 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2598 s++;
2601 /* Round up to the power of two, so that moduling by it is fast. */
2602 for (size = 1; size < s; size <<= 1)
2603 continue;
2606 use->n_map_members = size;
2607 use->cost_map = XCNEWVEC (struct cost_pair, size);
2611 /* Returns description of computation cost of expression whose runtime
2612 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2614 static comp_cost
2615 new_cost (unsigned runtime, unsigned complexity)
2617 comp_cost cost;
2619 cost.cost = runtime;
2620 cost.complexity = complexity;
2622 return cost;
2625 /* Adds costs COST1 and COST2. */
2627 static comp_cost
2628 add_costs (comp_cost cost1, comp_cost cost2)
2630 cost1.cost += cost2.cost;
2631 cost1.complexity += cost2.complexity;
2633 return cost1;
2635 /* Subtracts costs COST1 and COST2. */
2637 static comp_cost
2638 sub_costs (comp_cost cost1, comp_cost cost2)
2640 cost1.cost -= cost2.cost;
2641 cost1.complexity -= cost2.complexity;
2643 return cost1;
2646 /* Returns a negative number if COST1 < COST2, a positive number if
2647 COST1 > COST2, and 0 if COST1 = COST2. */
2649 static int
2650 compare_costs (comp_cost cost1, comp_cost cost2)
2652 if (cost1.cost == cost2.cost)
2653 return cost1.complexity - cost2.complexity;
2655 return cost1.cost - cost2.cost;
2658 /* Returns true if COST is infinite. */
2660 static bool
2661 infinite_cost_p (comp_cost cost)
2663 return cost.cost == INFTY;
2666 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2667 on invariants DEPENDS_ON and that the value used in expressing it
2668 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2670 static void
2671 set_use_iv_cost (struct ivopts_data *data,
2672 struct iv_use *use, struct iv_cand *cand,
2673 comp_cost cost, bitmap depends_on, tree value,
2674 enum tree_code comp, int inv_expr_id)
2676 unsigned i, s;
2678 if (infinite_cost_p (cost))
2680 BITMAP_FREE (depends_on);
2681 return;
2684 if (data->consider_all_candidates)
2686 use->cost_map[cand->id].cand = cand;
2687 use->cost_map[cand->id].cost = cost;
2688 use->cost_map[cand->id].depends_on = depends_on;
2689 use->cost_map[cand->id].value = value;
2690 use->cost_map[cand->id].comp = comp;
2691 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2692 return;
2695 /* n_map_members is a power of two, so this computes modulo. */
2696 s = cand->id & (use->n_map_members - 1);
2697 for (i = s; i < use->n_map_members; i++)
2698 if (!use->cost_map[i].cand)
2699 goto found;
2700 for (i = 0; i < s; i++)
2701 if (!use->cost_map[i].cand)
2702 goto found;
2704 gcc_unreachable ();
2706 found:
2707 use->cost_map[i].cand = cand;
2708 use->cost_map[i].cost = cost;
2709 use->cost_map[i].depends_on = depends_on;
2710 use->cost_map[i].value = value;
2711 use->cost_map[i].comp = comp;
2712 use->cost_map[i].inv_expr_id = inv_expr_id;
2715 /* Gets cost of (USE, CANDIDATE) pair. */
2717 static struct cost_pair *
2718 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2719 struct iv_cand *cand)
2721 unsigned i, s;
2722 struct cost_pair *ret;
2724 if (!cand)
2725 return NULL;
2727 if (data->consider_all_candidates)
2729 ret = use->cost_map + cand->id;
2730 if (!ret->cand)
2731 return NULL;
2733 return ret;
2736 /* n_map_members is a power of two, so this computes modulo. */
2737 s = cand->id & (use->n_map_members - 1);
2738 for (i = s; i < use->n_map_members; i++)
2739 if (use->cost_map[i].cand == cand)
2740 return use->cost_map + i;
2742 for (i = 0; i < s; i++)
2743 if (use->cost_map[i].cand == cand)
2744 return use->cost_map + i;
2746 return NULL;
2749 /* Returns estimate on cost of computing SEQ. */
2751 static unsigned
2752 seq_cost (rtx seq, bool speed)
2754 unsigned cost = 0;
2755 rtx set;
2757 for (; seq; seq = NEXT_INSN (seq))
2759 set = single_set (seq);
2760 if (set)
2761 cost += set_src_cost (SET_SRC (set), speed);
2762 else
2763 cost++;
2766 return cost;
2769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2770 static rtx
2771 produce_memory_decl_rtl (tree obj, int *regno)
2773 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2774 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2775 rtx x;
2777 gcc_assert (obj);
2778 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2780 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2781 x = gen_rtx_SYMBOL_REF (address_mode, name);
2782 SET_SYMBOL_REF_DECL (x, obj);
2783 x = gen_rtx_MEM (DECL_MODE (obj), x);
2784 set_mem_addr_space (x, as);
2785 targetm.encode_section_info (obj, x, true);
2787 else
2789 x = gen_raw_REG (address_mode, (*regno)++);
2790 x = gen_rtx_MEM (DECL_MODE (obj), x);
2791 set_mem_addr_space (x, as);
2794 return x;
2797 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2798 walk_tree. DATA contains the actual fake register number. */
2800 static tree
2801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2803 tree obj = NULL_TREE;
2804 rtx x = NULL_RTX;
2805 int *regno = (int *) data;
2807 switch (TREE_CODE (*expr_p))
2809 case ADDR_EXPR:
2810 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2811 handled_component_p (*expr_p);
2812 expr_p = &TREE_OPERAND (*expr_p, 0))
2813 continue;
2814 obj = *expr_p;
2815 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2816 x = produce_memory_decl_rtl (obj, regno);
2817 break;
2819 case SSA_NAME:
2820 *ws = 0;
2821 obj = SSA_NAME_VAR (*expr_p);
2822 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2823 if (!obj)
2824 return NULL_TREE;
2825 if (!DECL_RTL_SET_P (obj))
2826 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2827 break;
2829 case VAR_DECL:
2830 case PARM_DECL:
2831 case RESULT_DECL:
2832 *ws = 0;
2833 obj = *expr_p;
2835 if (DECL_RTL_SET_P (obj))
2836 break;
2838 if (DECL_MODE (obj) == BLKmode)
2839 x = produce_memory_decl_rtl (obj, regno);
2840 else
2841 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2843 break;
2845 default:
2846 break;
2849 if (x)
2851 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2852 SET_DECL_RTL (obj, x);
2855 return NULL_TREE;
2858 /* Determines cost of the computation of EXPR. */
2860 static unsigned
2861 computation_cost (tree expr, bool speed)
2863 rtx seq, rslt;
2864 tree type = TREE_TYPE (expr);
2865 unsigned cost;
2866 /* Avoid using hard regs in ways which may be unsupported. */
2867 int regno = LAST_VIRTUAL_REGISTER + 1;
2868 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2869 enum node_frequency real_frequency = node->frequency;
2871 node->frequency = NODE_FREQUENCY_NORMAL;
2872 crtl->maybe_hot_insn_p = speed;
2873 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2874 start_sequence ();
2875 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2876 seq = get_insns ();
2877 end_sequence ();
2878 default_rtl_profile ();
2879 node->frequency = real_frequency;
2881 cost = seq_cost (seq, speed);
2882 if (MEM_P (rslt))
2883 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2884 TYPE_ADDR_SPACE (type), speed);
2885 else if (!REG_P (rslt))
2886 cost += set_src_cost (rslt, speed);
2888 return cost;
2891 /* Returns variable containing the value of candidate CAND at statement AT. */
2893 static tree
2894 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2896 if (stmt_after_increment (loop, cand, stmt))
2897 return cand->var_after;
2898 else
2899 return cand->var_before;
2902 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2903 same precision that is at least as wide as the precision of TYPE, stores
2904 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2905 type of A and B. */
2907 static tree
2908 determine_common_wider_type (tree *a, tree *b)
2910 tree wider_type = NULL;
2911 tree suba, subb;
2912 tree atype = TREE_TYPE (*a);
2914 if (CONVERT_EXPR_P (*a))
2916 suba = TREE_OPERAND (*a, 0);
2917 wider_type = TREE_TYPE (suba);
2918 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2919 return atype;
2921 else
2922 return atype;
2924 if (CONVERT_EXPR_P (*b))
2926 subb = TREE_OPERAND (*b, 0);
2927 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2928 return atype;
2930 else
2931 return atype;
2933 *a = suba;
2934 *b = subb;
2935 return wider_type;
2938 /* Determines the expression by that USE is expressed from induction variable
2939 CAND at statement AT in LOOP. The expression is stored in a decomposed
2940 form into AFF. Returns false if USE cannot be expressed using CAND. */
2942 static bool
2943 get_computation_aff (struct loop *loop,
2944 struct iv_use *use, struct iv_cand *cand, gimple at,
2945 struct affine_tree_combination *aff)
2947 tree ubase = use->iv->base;
2948 tree ustep = use->iv->step;
2949 tree cbase = cand->iv->base;
2950 tree cstep = cand->iv->step, cstep_common;
2951 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2952 tree common_type, var;
2953 tree uutype;
2954 aff_tree cbase_aff, var_aff;
2955 double_int rat;
2957 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2959 /* We do not have a precision to express the values of use. */
2960 return false;
2963 var = var_at_stmt (loop, cand, at);
2964 uutype = unsigned_type_for (utype);
2966 /* If the conversion is not noop, perform it. */
2967 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2969 cstep = fold_convert (uutype, cstep);
2970 cbase = fold_convert (uutype, cbase);
2971 var = fold_convert (uutype, var);
2974 if (!constant_multiple_of (ustep, cstep, &rat))
2975 return false;
2977 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2978 type, we achieve better folding by computing their difference in this
2979 wider type, and cast the result to UUTYPE. We do not need to worry about
2980 overflows, as all the arithmetics will in the end be performed in UUTYPE
2981 anyway. */
2982 common_type = determine_common_wider_type (&ubase, &cbase);
2984 /* use = ubase - ratio * cbase + ratio * var. */
2985 tree_to_aff_combination (ubase, common_type, aff);
2986 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2987 tree_to_aff_combination (var, uutype, &var_aff);
2989 /* We need to shift the value if we are after the increment. */
2990 if (stmt_after_increment (loop, cand, at))
2992 aff_tree cstep_aff;
2994 if (common_type != uutype)
2995 cstep_common = fold_convert (common_type, cstep);
2996 else
2997 cstep_common = cstep;
2999 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3000 aff_combination_add (&cbase_aff, &cstep_aff);
3003 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3004 aff_combination_add (aff, &cbase_aff);
3005 if (common_type != uutype)
3006 aff_combination_convert (aff, uutype);
3008 aff_combination_scale (&var_aff, rat);
3009 aff_combination_add (aff, &var_aff);
3011 return true;
3014 /* Determines the expression by that USE is expressed from induction variable
3015 CAND at statement AT in LOOP. The computation is unshared. */
3017 static tree
3018 get_computation_at (struct loop *loop,
3019 struct iv_use *use, struct iv_cand *cand, gimple at)
3021 aff_tree aff;
3022 tree type = TREE_TYPE (use->iv->base);
3024 if (!get_computation_aff (loop, use, cand, at, &aff))
3025 return NULL_TREE;
3026 unshare_aff_combination (&aff);
3027 return fold_convert (type, aff_combination_to_tree (&aff));
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND in LOOP. The computation is unshared. */
3033 static tree
3034 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3036 return get_computation_at (loop, use, cand, use->stmt);
3039 /* Adjust the cost COST for being in loop setup rather than loop body.
3040 If we're optimizing for space, the loop setup overhead is constant;
3041 if we're optimizing for speed, amortize it over the per-iteration cost. */
3042 static unsigned
3043 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3045 if (cost == INFTY)
3046 return cost;
3047 else if (optimize_loop_for_speed_p (data->current_loop))
3048 return cost / avg_loop_niter (data->current_loop);
3049 else
3050 return cost;
3053 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3054 validity for a memory reference accessing memory of mode MODE in
3055 address space AS. */
3057 DEF_VEC_P (sbitmap);
3058 DEF_VEC_ALLOC_P (sbitmap, heap);
3060 bool
3061 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3062 addr_space_t as)
3064 #define MAX_RATIO 128
3065 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3066 static VEC (sbitmap, heap) *valid_mult_list;
3067 sbitmap valid_mult;
3069 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3070 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3072 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3073 if (!valid_mult)
3075 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3076 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3077 rtx addr;
3078 HOST_WIDE_INT i;
3080 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3081 sbitmap_zero (valid_mult);
3082 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3083 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3085 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3086 if (memory_address_addr_space_p (mode, addr, as))
3087 SET_BIT (valid_mult, i + MAX_RATIO);
3090 if (dump_file && (dump_flags & TDF_DETAILS))
3092 fprintf (dump_file, " allowed multipliers:");
3093 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3094 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3095 fprintf (dump_file, " %d", (int) i);
3096 fprintf (dump_file, "\n");
3097 fprintf (dump_file, "\n");
3100 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3103 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3104 return false;
3106 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3109 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3110 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3111 variable is omitted. Compute the cost for a memory reference that accesses
3112 a memory location of mode MEM_MODE in address space AS.
3114 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3115 size of MEM_MODE / RATIO) is available. To make this determination, we
3116 look at the size of the increment to be made, which is given in CSTEP.
3117 CSTEP may be zero if the step is unknown.
3118 STMT_AFTER_INC is true iff the statement we're looking at is after the
3119 increment of the original biv.
3121 TODO -- there must be some better way. This all is quite crude. */
3123 typedef struct address_cost_data_s
3125 HOST_WIDE_INT min_offset, max_offset;
3126 unsigned costs[2][2][2][2];
3127 } *address_cost_data;
3129 DEF_VEC_P (address_cost_data);
3130 DEF_VEC_ALLOC_P (address_cost_data, heap);
3132 static comp_cost
3133 get_address_cost (bool symbol_present, bool var_present,
3134 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3135 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3136 addr_space_t as, bool speed,
3137 bool stmt_after_inc, bool *may_autoinc)
3139 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3140 static VEC(address_cost_data, heap) *address_cost_data_list;
3141 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3142 address_cost_data data;
3143 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3144 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3145 unsigned cost, acost, complexity;
3146 bool offset_p, ratio_p, autoinc;
3147 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3148 unsigned HOST_WIDE_INT mask;
3149 unsigned bits;
3151 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3152 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3153 data_index + 1);
3155 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3156 if (!data)
3158 HOST_WIDE_INT i;
3159 HOST_WIDE_INT rat, off = 0;
3160 int old_cse_not_expected, width;
3161 unsigned sym_p, var_p, off_p, rat_p, add_c;
3162 rtx seq, addr, base;
3163 rtx reg0, reg1;
3165 data = (address_cost_data) xcalloc (1, sizeof (*data));
3167 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3169 width = GET_MODE_BITSIZE (address_mode) - 1;
3170 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3171 width = HOST_BITS_PER_WIDE_INT - 1;
3172 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3174 for (i = width; i >= 0; i--)
3176 off = -((unsigned HOST_WIDE_INT) 1 << i);
3177 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3178 if (memory_address_addr_space_p (mem_mode, addr, as))
3179 break;
3181 data->min_offset = (i == -1? 0 : off);
3183 for (i = width; i >= 0; i--)
3185 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3186 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3187 if (memory_address_addr_space_p (mem_mode, addr, as))
3188 break;
3190 if (i == -1)
3191 off = 0;
3192 data->max_offset = off;
3194 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, "get_address_cost:\n");
3197 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3198 GET_MODE_NAME (mem_mode),
3199 data->min_offset);
3200 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3201 GET_MODE_NAME (mem_mode),
3202 data->max_offset);
3205 rat = 1;
3206 for (i = 2; i <= MAX_RATIO; i++)
3207 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3209 rat = i;
3210 break;
3213 /* Compute the cost of various addressing modes. */
3214 acost = 0;
3215 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3216 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3218 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3219 || USE_STORE_PRE_DECREMENT (mem_mode))
3221 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3222 has_predec[mem_mode]
3223 = memory_address_addr_space_p (mem_mode, addr, as);
3225 if (USE_LOAD_POST_DECREMENT (mem_mode)
3226 || USE_STORE_POST_DECREMENT (mem_mode))
3228 addr = gen_rtx_POST_DEC (address_mode, reg0);
3229 has_postdec[mem_mode]
3230 = memory_address_addr_space_p (mem_mode, addr, as);
3232 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3233 || USE_STORE_PRE_DECREMENT (mem_mode))
3235 addr = gen_rtx_PRE_INC (address_mode, reg0);
3236 has_preinc[mem_mode]
3237 = memory_address_addr_space_p (mem_mode, addr, as);
3239 if (USE_LOAD_POST_INCREMENT (mem_mode)
3240 || USE_STORE_POST_INCREMENT (mem_mode))
3242 addr = gen_rtx_POST_INC (address_mode, reg0);
3243 has_postinc[mem_mode]
3244 = memory_address_addr_space_p (mem_mode, addr, as);
3246 for (i = 0; i < 16; i++)
3248 sym_p = i & 1;
3249 var_p = (i >> 1) & 1;
3250 off_p = (i >> 2) & 1;
3251 rat_p = (i >> 3) & 1;
3253 addr = reg0;
3254 if (rat_p)
3255 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3256 gen_int_mode (rat, address_mode));
3258 if (var_p)
3259 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3261 if (sym_p)
3263 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3264 /* ??? We can run into trouble with some backends by presenting
3265 it with symbols which haven't been properly passed through
3266 targetm.encode_section_info. By setting the local bit, we
3267 enhance the probability of things working. */
3268 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3270 if (off_p)
3271 base = gen_rtx_fmt_e (CONST, address_mode,
3272 gen_rtx_fmt_ee
3273 (PLUS, address_mode, base,
3274 gen_int_mode (off, address_mode)));
3276 else if (off_p)
3277 base = gen_int_mode (off, address_mode);
3278 else
3279 base = NULL_RTX;
3281 if (base)
3282 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3284 start_sequence ();
3285 /* To avoid splitting addressing modes, pretend that no cse will
3286 follow. */
3287 old_cse_not_expected = cse_not_expected;
3288 cse_not_expected = true;
3289 addr = memory_address_addr_space (mem_mode, addr, as);
3290 cse_not_expected = old_cse_not_expected;
3291 seq = get_insns ();
3292 end_sequence ();
3294 acost = seq_cost (seq, speed);
3295 acost += address_cost (addr, mem_mode, as, speed);
3297 if (!acost)
3298 acost = 1;
3299 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3302 /* On some targets, it is quite expensive to load symbol to a register,
3303 which makes addresses that contain symbols look much more expensive.
3304 However, the symbol will have to be loaded in any case before the
3305 loop (and quite likely we have it in register already), so it does not
3306 make much sense to penalize them too heavily. So make some final
3307 tweaks for the SYMBOL_PRESENT modes:
3309 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3310 var is cheaper, use this mode with small penalty.
3311 If VAR_PRESENT is true, try whether the mode with
3312 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3313 if this is the case, use it. */
3314 add_c = add_cost (speed, address_mode);
3315 for (i = 0; i < 8; i++)
3317 var_p = i & 1;
3318 off_p = (i >> 1) & 1;
3319 rat_p = (i >> 2) & 1;
3321 acost = data->costs[0][1][off_p][rat_p] + 1;
3322 if (var_p)
3323 acost += add_c;
3325 if (acost < data->costs[1][var_p][off_p][rat_p])
3326 data->costs[1][var_p][off_p][rat_p] = acost;
3329 if (dump_file && (dump_flags & TDF_DETAILS))
3331 fprintf (dump_file, "Address costs:\n");
3333 for (i = 0; i < 16; i++)
3335 sym_p = i & 1;
3336 var_p = (i >> 1) & 1;
3337 off_p = (i >> 2) & 1;
3338 rat_p = (i >> 3) & 1;
3340 fprintf (dump_file, " ");
3341 if (sym_p)
3342 fprintf (dump_file, "sym + ");
3343 if (var_p)
3344 fprintf (dump_file, "var + ");
3345 if (off_p)
3346 fprintf (dump_file, "cst + ");
3347 if (rat_p)
3348 fprintf (dump_file, "rat * ");
3350 acost = data->costs[sym_p][var_p][off_p][rat_p];
3351 fprintf (dump_file, "index costs %d\n", acost);
3353 if (has_predec[mem_mode] || has_postdec[mem_mode]
3354 || has_preinc[mem_mode] || has_postinc[mem_mode])
3355 fprintf (dump_file, " May include autoinc/dec\n");
3356 fprintf (dump_file, "\n");
3359 VEC_replace (address_cost_data, address_cost_data_list,
3360 data_index, data);
3363 bits = GET_MODE_BITSIZE (address_mode);
3364 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3365 offset &= mask;
3366 if ((offset >> (bits - 1) & 1))
3367 offset |= ~mask;
3368 s_offset = offset;
3370 autoinc = false;
3371 msize = GET_MODE_SIZE (mem_mode);
3372 autoinc_offset = offset;
3373 if (stmt_after_inc)
3374 autoinc_offset += ratio * cstep;
3375 if (symbol_present || var_present || ratio != 1)
3376 autoinc = false;
3377 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3378 && msize == cstep)
3379 || (has_postdec[mem_mode] && autoinc_offset == 0
3380 && msize == -cstep)
3381 || (has_preinc[mem_mode] && autoinc_offset == msize
3382 && msize == cstep)
3383 || (has_predec[mem_mode] && autoinc_offset == -msize
3384 && msize == -cstep))
3385 autoinc = true;
3387 cost = 0;
3388 offset_p = (s_offset != 0
3389 && data->min_offset <= s_offset
3390 && s_offset <= data->max_offset);
3391 ratio_p = (ratio != 1
3392 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3394 if (ratio != 1 && !ratio_p)
3395 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3397 if (s_offset && !offset_p && !symbol_present)
3398 cost += add_cost (speed, address_mode);
3400 if (may_autoinc)
3401 *may_autoinc = autoinc;
3402 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3403 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3404 return new_cost (cost + acost, complexity);
3407 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3408 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3409 calculating the operands of EXPR. Returns true if successful, and returns
3410 the cost in COST. */
3412 static bool
3413 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3414 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3416 comp_cost res;
3417 tree op1 = TREE_OPERAND (expr, 1);
3418 tree cst = TREE_OPERAND (mult, 1);
3419 tree multop = TREE_OPERAND (mult, 0);
3420 int m = exact_log2 (int_cst_value (cst));
3421 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3422 int sa_cost;
3424 if (!(m >= 0 && m < maxm))
3425 return false;
3427 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3428 ? shiftadd_cost (speed, mode, m)
3429 : (mult == op1
3430 ? shiftsub1_cost (speed, mode, m)
3431 : shiftsub0_cost (speed, mode, m)));
3432 res = new_cost (sa_cost, 0);
3433 res = add_costs (res, mult == op1 ? cost0 : cost1);
3435 STRIP_NOPS (multop);
3436 if (!is_gimple_val (multop))
3437 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3439 *cost = res;
3440 return true;
3443 /* Estimates cost of forcing expression EXPR into a variable. */
3445 static comp_cost
3446 force_expr_to_var_cost (tree expr, bool speed)
3448 static bool costs_initialized = false;
3449 static unsigned integer_cost [2];
3450 static unsigned symbol_cost [2];
3451 static unsigned address_cost [2];
3452 tree op0, op1;
3453 comp_cost cost0, cost1, cost;
3454 enum machine_mode mode;
3456 if (!costs_initialized)
3458 tree type = build_pointer_type (integer_type_node);
3459 tree var, addr;
3460 rtx x;
3461 int i;
3463 var = create_tmp_var_raw (integer_type_node, "test_var");
3464 TREE_STATIC (var) = 1;
3465 x = produce_memory_decl_rtl (var, NULL);
3466 SET_DECL_RTL (var, x);
3468 addr = build1 (ADDR_EXPR, type, var);
3471 for (i = 0; i < 2; i++)
3473 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3474 2000), i);
3476 symbol_cost[i] = computation_cost (addr, i) + 1;
3478 address_cost[i]
3479 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3482 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3483 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3484 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3485 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3486 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3487 fprintf (dump_file, "\n");
3491 costs_initialized = true;
3494 STRIP_NOPS (expr);
3496 if (SSA_VAR_P (expr))
3497 return no_cost;
3499 if (is_gimple_min_invariant (expr))
3501 if (TREE_CODE (expr) == INTEGER_CST)
3502 return new_cost (integer_cost [speed], 0);
3504 if (TREE_CODE (expr) == ADDR_EXPR)
3506 tree obj = TREE_OPERAND (expr, 0);
3508 if (TREE_CODE (obj) == VAR_DECL
3509 || TREE_CODE (obj) == PARM_DECL
3510 || TREE_CODE (obj) == RESULT_DECL)
3511 return new_cost (symbol_cost [speed], 0);
3514 return new_cost (address_cost [speed], 0);
3517 switch (TREE_CODE (expr))
3519 case POINTER_PLUS_EXPR:
3520 case PLUS_EXPR:
3521 case MINUS_EXPR:
3522 case MULT_EXPR:
3523 op0 = TREE_OPERAND (expr, 0);
3524 op1 = TREE_OPERAND (expr, 1);
3525 STRIP_NOPS (op0);
3526 STRIP_NOPS (op1);
3528 if (is_gimple_val (op0))
3529 cost0 = no_cost;
3530 else
3531 cost0 = force_expr_to_var_cost (op0, speed);
3533 if (is_gimple_val (op1))
3534 cost1 = no_cost;
3535 else
3536 cost1 = force_expr_to_var_cost (op1, speed);
3538 break;
3540 case NEGATE_EXPR:
3541 op0 = TREE_OPERAND (expr, 0);
3542 STRIP_NOPS (op0);
3543 op1 = NULL_TREE;
3545 if (is_gimple_val (op0))
3546 cost0 = no_cost;
3547 else
3548 cost0 = force_expr_to_var_cost (op0, speed);
3550 cost1 = no_cost;
3551 break;
3553 default:
3554 /* Just an arbitrary value, FIXME. */
3555 return new_cost (target_spill_cost[speed], 0);
3558 mode = TYPE_MODE (TREE_TYPE (expr));
3559 switch (TREE_CODE (expr))
3561 case POINTER_PLUS_EXPR:
3562 case PLUS_EXPR:
3563 case MINUS_EXPR:
3564 case NEGATE_EXPR:
3565 cost = new_cost (add_cost (speed, mode), 0);
3566 if (TREE_CODE (expr) != NEGATE_EXPR)
3568 tree mult = NULL_TREE;
3569 comp_cost sa_cost;
3570 if (TREE_CODE (op1) == MULT_EXPR)
3571 mult = op1;
3572 else if (TREE_CODE (op0) == MULT_EXPR)
3573 mult = op0;
3575 if (mult != NULL_TREE
3576 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3577 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3578 speed, &sa_cost))
3579 return sa_cost;
3581 break;
3583 case MULT_EXPR:
3584 if (cst_and_fits_in_hwi (op0))
3585 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3586 mode, speed), 0);
3587 else if (cst_and_fits_in_hwi (op1))
3588 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3589 mode, speed), 0);
3590 else
3591 return new_cost (target_spill_cost [speed], 0);
3592 break;
3594 default:
3595 gcc_unreachable ();
3598 cost = add_costs (cost, cost0);
3599 cost = add_costs (cost, cost1);
3601 /* Bound the cost by target_spill_cost. The parts of complicated
3602 computations often are either loop invariant or at least can
3603 be shared between several iv uses, so letting this grow without
3604 limits would not give reasonable results. */
3605 if (cost.cost > (int) target_spill_cost [speed])
3606 cost.cost = target_spill_cost [speed];
3608 return cost;
3611 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3612 invariants the computation depends on. */
3614 static comp_cost
3615 force_var_cost (struct ivopts_data *data,
3616 tree expr, bitmap *depends_on)
3618 if (depends_on)
3620 fd_ivopts_data = data;
3621 walk_tree (&expr, find_depends, depends_on, NULL);
3624 return force_expr_to_var_cost (expr, data->speed);
3627 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3628 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3629 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3630 invariants the computation depends on. */
3632 static comp_cost
3633 split_address_cost (struct ivopts_data *data,
3634 tree addr, bool *symbol_present, bool *var_present,
3635 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3637 tree core;
3638 HOST_WIDE_INT bitsize;
3639 HOST_WIDE_INT bitpos;
3640 tree toffset;
3641 enum machine_mode mode;
3642 int unsignedp, volatilep;
3644 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3645 &unsignedp, &volatilep, false);
3647 if (toffset != 0
3648 || bitpos % BITS_PER_UNIT != 0
3649 || TREE_CODE (core) != VAR_DECL)
3651 *symbol_present = false;
3652 *var_present = true;
3653 fd_ivopts_data = data;
3654 walk_tree (&addr, find_depends, depends_on, NULL);
3655 return new_cost (target_spill_cost[data->speed], 0);
3658 *offset += bitpos / BITS_PER_UNIT;
3659 if (TREE_STATIC (core)
3660 || DECL_EXTERNAL (core))
3662 *symbol_present = true;
3663 *var_present = false;
3664 return no_cost;
3667 *symbol_present = false;
3668 *var_present = true;
3669 return no_cost;
3672 /* Estimates cost of expressing difference of addresses E1 - E2 as
3673 var + symbol + offset. The value of offset is added to OFFSET,
3674 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3675 part is missing. DEPENDS_ON is a set of the invariants the computation
3676 depends on. */
3678 static comp_cost
3679 ptr_difference_cost (struct ivopts_data *data,
3680 tree e1, tree e2, bool *symbol_present, bool *var_present,
3681 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3683 HOST_WIDE_INT diff = 0;
3684 aff_tree aff_e1, aff_e2;
3685 tree type;
3687 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3689 if (ptr_difference_const (e1, e2, &diff))
3691 *offset += diff;
3692 *symbol_present = false;
3693 *var_present = false;
3694 return no_cost;
3697 if (integer_zerop (e2))
3698 return split_address_cost (data, TREE_OPERAND (e1, 0),
3699 symbol_present, var_present, offset, depends_on);
3701 *symbol_present = false;
3702 *var_present = true;
3704 type = signed_type_for (TREE_TYPE (e1));
3705 tree_to_aff_combination (e1, type, &aff_e1);
3706 tree_to_aff_combination (e2, type, &aff_e2);
3707 aff_combination_scale (&aff_e2, double_int_minus_one);
3708 aff_combination_add (&aff_e1, &aff_e2);
3710 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3713 /* Estimates cost of expressing difference E1 - E2 as
3714 var + symbol + offset. The value of offset is added to OFFSET,
3715 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3716 part is missing. DEPENDS_ON is a set of the invariants the computation
3717 depends on. */
3719 static comp_cost
3720 difference_cost (struct ivopts_data *data,
3721 tree e1, tree e2, bool *symbol_present, bool *var_present,
3722 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3724 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3725 unsigned HOST_WIDE_INT off1, off2;
3726 aff_tree aff_e1, aff_e2;
3727 tree type;
3729 e1 = strip_offset (e1, &off1);
3730 e2 = strip_offset (e2, &off2);
3731 *offset += off1 - off2;
3733 STRIP_NOPS (e1);
3734 STRIP_NOPS (e2);
3736 if (TREE_CODE (e1) == ADDR_EXPR)
3737 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3738 offset, depends_on);
3739 *symbol_present = false;
3741 if (operand_equal_p (e1, e2, 0))
3743 *var_present = false;
3744 return no_cost;
3747 *var_present = true;
3749 if (integer_zerop (e2))
3750 return force_var_cost (data, e1, depends_on);
3752 if (integer_zerop (e1))
3754 comp_cost cost = force_var_cost (data, e2, depends_on);
3755 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3756 return cost;
3759 type = signed_type_for (TREE_TYPE (e1));
3760 tree_to_aff_combination (e1, type, &aff_e1);
3761 tree_to_aff_combination (e2, type, &aff_e2);
3762 aff_combination_scale (&aff_e2, double_int_minus_one);
3763 aff_combination_add (&aff_e1, &aff_e2);
3765 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3768 /* Returns true if AFF1 and AFF2 are identical. */
3770 static bool
3771 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3773 unsigned i;
3775 if (aff1->n != aff2->n)
3776 return false;
3778 for (i = 0; i < aff1->n; i++)
3780 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3781 return false;
3783 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3784 return false;
3786 return true;
3789 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3791 static int
3792 get_expr_id (struct ivopts_data *data, tree expr)
3794 struct iv_inv_expr_ent ent;
3795 struct iv_inv_expr_ent **slot;
3797 ent.expr = expr;
3798 ent.hash = iterative_hash_expr (expr, 0);
3799 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3800 &ent, INSERT);
3801 if (*slot)
3802 return (*slot)->id;
3804 *slot = XNEW (struct iv_inv_expr_ent);
3805 (*slot)->expr = expr;
3806 (*slot)->hash = ent.hash;
3807 (*slot)->id = data->inv_expr_id++;
3808 return (*slot)->id;
3811 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3812 requires a new compiler generated temporary. Returns -1 otherwise.
3813 ADDRESS_P is a flag indicating if the expression is for address
3814 computation. */
3816 static int
3817 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3818 tree cbase, HOST_WIDE_INT ratio,
3819 bool address_p)
3821 aff_tree ubase_aff, cbase_aff;
3822 tree expr, ub, cb;
3824 STRIP_NOPS (ubase);
3825 STRIP_NOPS (cbase);
3826 ub = ubase;
3827 cb = cbase;
3829 if ((TREE_CODE (ubase) == INTEGER_CST)
3830 && (TREE_CODE (cbase) == INTEGER_CST))
3831 return -1;
3833 /* Strips the constant part. */
3834 if (TREE_CODE (ubase) == PLUS_EXPR
3835 || TREE_CODE (ubase) == MINUS_EXPR
3836 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3838 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3839 ubase = TREE_OPERAND (ubase, 0);
3842 /* Strips the constant part. */
3843 if (TREE_CODE (cbase) == PLUS_EXPR
3844 || TREE_CODE (cbase) == MINUS_EXPR
3845 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3847 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3848 cbase = TREE_OPERAND (cbase, 0);
3851 if (address_p)
3853 if (((TREE_CODE (ubase) == SSA_NAME)
3854 || (TREE_CODE (ubase) == ADDR_EXPR
3855 && is_gimple_min_invariant (ubase)))
3856 && (TREE_CODE (cbase) == INTEGER_CST))
3857 return -1;
3859 if (((TREE_CODE (cbase) == SSA_NAME)
3860 || (TREE_CODE (cbase) == ADDR_EXPR
3861 && is_gimple_min_invariant (cbase)))
3862 && (TREE_CODE (ubase) == INTEGER_CST))
3863 return -1;
3866 if (ratio == 1)
3868 if(operand_equal_p (ubase, cbase, 0))
3869 return -1;
3871 if (TREE_CODE (ubase) == ADDR_EXPR
3872 && TREE_CODE (cbase) == ADDR_EXPR)
3874 tree usym, csym;
3876 usym = TREE_OPERAND (ubase, 0);
3877 csym = TREE_OPERAND (cbase, 0);
3878 if (TREE_CODE (usym) == ARRAY_REF)
3880 tree ind = TREE_OPERAND (usym, 1);
3881 if (TREE_CODE (ind) == INTEGER_CST
3882 && host_integerp (ind, 0)
3883 && TREE_INT_CST_LOW (ind) == 0)
3884 usym = TREE_OPERAND (usym, 0);
3886 if (TREE_CODE (csym) == ARRAY_REF)
3888 tree ind = TREE_OPERAND (csym, 1);
3889 if (TREE_CODE (ind) == INTEGER_CST
3890 && host_integerp (ind, 0)
3891 && TREE_INT_CST_LOW (ind) == 0)
3892 csym = TREE_OPERAND (csym, 0);
3894 if (operand_equal_p (usym, csym, 0))
3895 return -1;
3897 /* Now do more complex comparison */
3898 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3899 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3900 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3901 return -1;
3904 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3905 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3907 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3908 aff_combination_add (&ubase_aff, &cbase_aff);
3909 expr = aff_combination_to_tree (&ubase_aff);
3910 return get_expr_id (data, expr);
3915 /* Determines the cost of the computation by that USE is expressed
3916 from induction variable CAND. If ADDRESS_P is true, we just need
3917 to create an address from it, otherwise we want to get it into
3918 register. A set of invariants we depend on is stored in
3919 DEPENDS_ON. AT is the statement at that the value is computed.
3920 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3921 addressing is likely. */
3923 static comp_cost
3924 get_computation_cost_at (struct ivopts_data *data,
3925 struct iv_use *use, struct iv_cand *cand,
3926 bool address_p, bitmap *depends_on, gimple at,
3927 bool *can_autoinc,
3928 int *inv_expr_id)
3930 tree ubase = use->iv->base, ustep = use->iv->step;
3931 tree cbase, cstep;
3932 tree utype = TREE_TYPE (ubase), ctype;
3933 unsigned HOST_WIDE_INT cstepi, offset = 0;
3934 HOST_WIDE_INT ratio, aratio;
3935 bool var_present, symbol_present, stmt_is_after_inc;
3936 comp_cost cost;
3937 double_int rat;
3938 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3940 *depends_on = NULL;
3942 /* Only consider real candidates. */
3943 if (!cand->iv)
3944 return infinite_cost;
3946 cbase = cand->iv->base;
3947 cstep = cand->iv->step;
3948 ctype = TREE_TYPE (cbase);
3950 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3952 /* We do not have a precision to express the values of use. */
3953 return infinite_cost;
3956 if (address_p
3957 || (use->iv->base_object
3958 && cand->iv->base_object
3959 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3960 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3962 /* Do not try to express address of an object with computation based
3963 on address of a different object. This may cause problems in rtl
3964 level alias analysis (that does not expect this to be happening,
3965 as this is illegal in C), and would be unlikely to be useful
3966 anyway. */
3967 if (use->iv->base_object
3968 && cand->iv->base_object
3969 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3970 return infinite_cost;
3973 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3975 /* TODO -- add direct handling of this case. */
3976 goto fallback;
3979 /* CSTEPI is removed from the offset in case statement is after the
3980 increment. If the step is not constant, we use zero instead.
3981 This is a bit imprecise (there is the extra addition), but
3982 redundancy elimination is likely to transform the code so that
3983 it uses value of the variable before increment anyway,
3984 so it is not that much unrealistic. */
3985 if (cst_and_fits_in_hwi (cstep))
3986 cstepi = int_cst_value (cstep);
3987 else
3988 cstepi = 0;
3990 if (!constant_multiple_of (ustep, cstep, &rat))
3991 return infinite_cost;
3993 if (double_int_fits_in_shwi_p (rat))
3994 ratio = double_int_to_shwi (rat);
3995 else
3996 return infinite_cost;
3998 STRIP_NOPS (cbase);
3999 ctype = TREE_TYPE (cbase);
4001 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4003 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4004 or ratio == 1, it is better to handle this like
4006 ubase - ratio * cbase + ratio * var
4008 (also holds in the case ratio == -1, TODO. */
4010 if (cst_and_fits_in_hwi (cbase))
4012 offset = - ratio * int_cst_value (cbase);
4013 cost = difference_cost (data,
4014 ubase, build_int_cst (utype, 0),
4015 &symbol_present, &var_present, &offset,
4016 depends_on);
4017 cost.cost /= avg_loop_niter (data->current_loop);
4019 else if (ratio == 1)
4021 tree real_cbase = cbase;
4023 /* Check to see if any adjustment is needed. */
4024 if (cstepi == 0 && stmt_is_after_inc)
4026 aff_tree real_cbase_aff;
4027 aff_tree cstep_aff;
4029 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4030 &real_cbase_aff);
4031 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4033 aff_combination_add (&real_cbase_aff, &cstep_aff);
4034 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4037 cost = difference_cost (data,
4038 ubase, real_cbase,
4039 &symbol_present, &var_present, &offset,
4040 depends_on);
4041 cost.cost /= avg_loop_niter (data->current_loop);
4043 else if (address_p
4044 && !POINTER_TYPE_P (ctype)
4045 && multiplier_allowed_in_address_p
4046 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4047 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4049 cbase
4050 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4051 cost = difference_cost (data,
4052 ubase, cbase,
4053 &symbol_present, &var_present, &offset,
4054 depends_on);
4055 cost.cost /= avg_loop_niter (data->current_loop);
4057 else
4059 cost = force_var_cost (data, cbase, depends_on);
4060 cost = add_costs (cost,
4061 difference_cost (data,
4062 ubase, build_int_cst (utype, 0),
4063 &symbol_present, &var_present,
4064 &offset, depends_on));
4065 cost.cost /= avg_loop_niter (data->current_loop);
4066 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4069 if (inv_expr_id)
4071 *inv_expr_id =
4072 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4073 /* Clear depends on. */
4074 if (*inv_expr_id != -1 && depends_on && *depends_on)
4075 bitmap_clear (*depends_on);
4078 /* If we are after the increment, the value of the candidate is higher by
4079 one iteration. */
4080 if (stmt_is_after_inc)
4081 offset -= ratio * cstepi;
4083 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4084 (symbol/var1/const parts may be omitted). If we are looking for an
4085 address, find the cost of addressing this. */
4086 if (address_p)
4087 return add_costs (cost,
4088 get_address_cost (symbol_present, var_present,
4089 offset, ratio, cstepi,
4090 TYPE_MODE (TREE_TYPE (utype)),
4091 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4092 speed, stmt_is_after_inc,
4093 can_autoinc));
4095 /* Otherwise estimate the costs for computing the expression. */
4096 if (!symbol_present && !var_present && !offset)
4098 if (ratio != 1)
4099 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4100 return cost;
4103 /* Symbol + offset should be compile-time computable so consider that they
4104 are added once to the variable, if present. */
4105 if (var_present && (symbol_present || offset))
4106 cost.cost += adjust_setup_cost (data,
4107 add_cost (speed, TYPE_MODE (ctype)));
4109 /* Having offset does not affect runtime cost in case it is added to
4110 symbol, but it increases complexity. */
4111 if (offset)
4112 cost.complexity++;
4114 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4116 aratio = ratio > 0 ? ratio : -ratio;
4117 if (aratio != 1)
4118 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4119 return cost;
4121 fallback:
4122 if (can_autoinc)
4123 *can_autoinc = false;
4126 /* Just get the expression, expand it and measure the cost. */
4127 tree comp = get_computation_at (data->current_loop, use, cand, at);
4129 if (!comp)
4130 return infinite_cost;
4132 if (address_p)
4133 comp = build_simple_mem_ref (comp);
4135 return new_cost (computation_cost (comp, speed), 0);
4139 /* Determines the cost of the computation by that USE is expressed
4140 from induction variable CAND. If ADDRESS_P is true, we just need
4141 to create an address from it, otherwise we want to get it into
4142 register. A set of invariants we depend on is stored in
4143 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4144 autoinc addressing is likely. */
4146 static comp_cost
4147 get_computation_cost (struct ivopts_data *data,
4148 struct iv_use *use, struct iv_cand *cand,
4149 bool address_p, bitmap *depends_on,
4150 bool *can_autoinc, int *inv_expr_id)
4152 return get_computation_cost_at (data,
4153 use, cand, address_p, depends_on, use->stmt,
4154 can_autoinc, inv_expr_id);
4157 /* Determines cost of basing replacement of USE on CAND in a generic
4158 expression. */
4160 static bool
4161 determine_use_iv_cost_generic (struct ivopts_data *data,
4162 struct iv_use *use, struct iv_cand *cand)
4164 bitmap depends_on;
4165 comp_cost cost;
4166 int inv_expr_id = -1;
4168 /* The simple case first -- if we need to express value of the preserved
4169 original biv, the cost is 0. This also prevents us from counting the
4170 cost of increment twice -- once at this use and once in the cost of
4171 the candidate. */
4172 if (cand->pos == IP_ORIGINAL
4173 && cand->incremented_at == use->stmt)
4175 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4176 ERROR_MARK, -1);
4177 return true;
4180 cost = get_computation_cost (data, use, cand, false, &depends_on,
4181 NULL, &inv_expr_id);
4183 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4184 inv_expr_id);
4186 return !infinite_cost_p (cost);
4189 /* Determines cost of basing replacement of USE on CAND in an address. */
4191 static bool
4192 determine_use_iv_cost_address (struct ivopts_data *data,
4193 struct iv_use *use, struct iv_cand *cand)
4195 bitmap depends_on;
4196 bool can_autoinc;
4197 int inv_expr_id = -1;
4198 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4199 &can_autoinc, &inv_expr_id);
4201 if (cand->ainc_use == use)
4203 if (can_autoinc)
4204 cost.cost -= cand->cost_step;
4205 /* If we generated the candidate solely for exploiting autoincrement
4206 opportunities, and it turns out it can't be used, set the cost to
4207 infinity to make sure we ignore it. */
4208 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4209 cost = infinite_cost;
4211 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4212 inv_expr_id);
4214 return !infinite_cost_p (cost);
4217 /* Computes value of candidate CAND at position AT in iteration NITER, and
4218 stores it to VAL. */
4220 static void
4221 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4222 aff_tree *val)
4224 aff_tree step, delta, nit;
4225 struct iv *iv = cand->iv;
4226 tree type = TREE_TYPE (iv->base);
4227 tree steptype = type;
4228 if (POINTER_TYPE_P (type))
4229 steptype = sizetype;
4231 tree_to_aff_combination (iv->step, steptype, &step);
4232 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4233 aff_combination_convert (&nit, steptype);
4234 aff_combination_mult (&nit, &step, &delta);
4235 if (stmt_after_increment (loop, cand, at))
4236 aff_combination_add (&delta, &step);
4238 tree_to_aff_combination (iv->base, type, val);
4239 aff_combination_add (val, &delta);
4242 /* Returns period of induction variable iv. */
4244 static tree
4245 iv_period (struct iv *iv)
4247 tree step = iv->step, period, type;
4248 tree pow2div;
4250 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4252 type = unsigned_type_for (TREE_TYPE (step));
4253 /* Period of the iv is lcm (step, type_range)/step -1,
4254 i.e., N*type_range/step - 1. Since type range is power
4255 of two, N == (step >> num_of_ending_zeros_binary (step),
4256 so the final result is
4258 (type_range >> num_of_ending_zeros_binary (step)) - 1
4261 pow2div = num_ending_zeros (step);
4263 period = build_low_bits_mask (type,
4264 (TYPE_PRECISION (type)
4265 - tree_low_cst (pow2div, 1)));
4267 return period;
4270 /* Returns the comparison operator used when eliminating the iv USE. */
4272 static enum tree_code
4273 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4275 struct loop *loop = data->current_loop;
4276 basic_block ex_bb;
4277 edge exit;
4279 ex_bb = gimple_bb (use->stmt);
4280 exit = EDGE_SUCC (ex_bb, 0);
4281 if (flow_bb_inside_loop_p (loop, exit->dest))
4282 exit = EDGE_SUCC (ex_bb, 1);
4284 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4287 static tree
4288 strip_wrap_conserving_type_conversions (tree exp)
4290 while (tree_ssa_useless_type_conversion (exp)
4291 && (nowrap_type_p (TREE_TYPE (exp))
4292 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4293 exp = TREE_OPERAND (exp, 0);
4294 return exp;
4297 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4298 check for an exact match. */
4300 static bool
4301 expr_equal_p (tree e, tree what)
4303 gimple stmt;
4304 enum tree_code code;
4306 e = strip_wrap_conserving_type_conversions (e);
4307 what = strip_wrap_conserving_type_conversions (what);
4309 code = TREE_CODE (what);
4310 if (TREE_TYPE (e) != TREE_TYPE (what))
4311 return false;
4313 if (operand_equal_p (e, what, 0))
4314 return true;
4316 if (TREE_CODE (e) != SSA_NAME)
4317 return false;
4319 stmt = SSA_NAME_DEF_STMT (e);
4320 if (gimple_code (stmt) != GIMPLE_ASSIGN
4321 || gimple_assign_rhs_code (stmt) != code)
4322 return false;
4324 switch (get_gimple_rhs_class (code))
4326 case GIMPLE_BINARY_RHS:
4327 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4328 return false;
4329 /* Fallthru. */
4331 case GIMPLE_UNARY_RHS:
4332 case GIMPLE_SINGLE_RHS:
4333 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4334 default:
4335 return false;
4339 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4340 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4341 calculation is performed in non-wrapping type.
4343 TODO: More generally, we could test for the situation that
4344 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4345 This would require knowing the sign of OFFSET.
4347 Also, we only look for the first addition in the computation of BASE.
4348 More complex analysis would be better, but introducing it just for
4349 this optimization seems like an overkill. */
4351 static bool
4352 difference_cannot_overflow_p (tree base, tree offset)
4354 enum tree_code code;
4355 tree e1, e2;
4357 if (!nowrap_type_p (TREE_TYPE (base)))
4358 return false;
4360 base = expand_simple_operations (base);
4362 if (TREE_CODE (base) == SSA_NAME)
4364 gimple stmt = SSA_NAME_DEF_STMT (base);
4366 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4367 return false;
4369 code = gimple_assign_rhs_code (stmt);
4370 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4371 return false;
4373 e1 = gimple_assign_rhs1 (stmt);
4374 e2 = gimple_assign_rhs2 (stmt);
4376 else
4378 code = TREE_CODE (base);
4379 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4380 return false;
4381 e1 = TREE_OPERAND (base, 0);
4382 e2 = TREE_OPERAND (base, 1);
4385 /* TODO: deeper inspection may be necessary to prove the equality. */
4386 switch (code)
4388 case PLUS_EXPR:
4389 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4390 case POINTER_PLUS_EXPR:
4391 return expr_equal_p (e2, offset);
4393 default:
4394 return false;
4398 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4399 comparison with CAND. NITER describes the number of iterations of
4400 the loops. If successful, the comparison in COMP_P is altered accordingly.
4402 We aim to handle the following situation:
4404 sometype *base, *p;
4405 int a, b, i;
4407 i = a;
4408 p = p_0 = base + a;
4412 bla (*p);
4413 p++;
4414 i++;
4416 while (i < b);
4418 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4419 We aim to optimize this to
4421 p = p_0 = base + a;
4424 bla (*p);
4425 p++;
4427 while (p < p_0 - a + b);
4429 This preserves the correctness, since the pointer arithmetics does not
4430 overflow. More precisely:
4432 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4433 overflow in computing it or the values of p.
4434 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4435 overflow. To prove this, we use the fact that p_0 = base + a. */
4437 static bool
4438 iv_elimination_compare_lt (struct ivopts_data *data,
4439 struct iv_cand *cand, enum tree_code *comp_p,
4440 struct tree_niter_desc *niter)
4442 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4443 struct affine_tree_combination nit, tmpa, tmpb;
4444 enum tree_code comp;
4445 HOST_WIDE_INT step;
4447 /* We need to know that the candidate induction variable does not overflow.
4448 While more complex analysis may be used to prove this, for now just
4449 check that the variable appears in the original program and that it
4450 is computed in a type that guarantees no overflows. */
4451 cand_type = TREE_TYPE (cand->iv->base);
4452 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4453 return false;
4455 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4456 the calculation of the BOUND could overflow, making the comparison
4457 invalid. */
4458 if (!data->loop_single_exit_p)
4459 return false;
4461 /* We need to be able to decide whether candidate is increasing or decreasing
4462 in order to choose the right comparison operator. */
4463 if (!cst_and_fits_in_hwi (cand->iv->step))
4464 return false;
4465 step = int_cst_value (cand->iv->step);
4467 /* Check that the number of iterations matches the expected pattern:
4468 a + 1 > b ? 0 : b - a - 1. */
4469 mbz = niter->may_be_zero;
4470 if (TREE_CODE (mbz) == GT_EXPR)
4472 /* Handle a + 1 > b. */
4473 tree op0 = TREE_OPERAND (mbz, 0);
4474 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4476 a = TREE_OPERAND (op0, 0);
4477 b = TREE_OPERAND (mbz, 1);
4479 else
4480 return false;
4482 else if (TREE_CODE (mbz) == LT_EXPR)
4484 tree op1 = TREE_OPERAND (mbz, 1);
4486 /* Handle b < a + 1. */
4487 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4489 a = TREE_OPERAND (op1, 0);
4490 b = TREE_OPERAND (mbz, 0);
4492 else
4493 return false;
4495 else
4496 return false;
4498 /* Expected number of iterations is B - A - 1. Check that it matches
4499 the actual number, i.e., that B - A - NITER = 1. */
4500 tree_to_aff_combination (niter->niter, nit_type, &nit);
4501 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4502 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4503 aff_combination_scale (&nit, double_int_minus_one);
4504 aff_combination_scale (&tmpa, double_int_minus_one);
4505 aff_combination_add (&tmpb, &tmpa);
4506 aff_combination_add (&tmpb, &nit);
4507 if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4508 return false;
4510 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4511 overflow. */
4512 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4513 cand->iv->step,
4514 fold_convert (TREE_TYPE (cand->iv->step), a));
4515 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4516 return false;
4518 /* Determine the new comparison operator. */
4519 comp = step < 0 ? GT_EXPR : LT_EXPR;
4520 if (*comp_p == NE_EXPR)
4521 *comp_p = comp;
4522 else if (*comp_p == EQ_EXPR)
4523 *comp_p = invert_tree_comparison (comp, false);
4524 else
4525 gcc_unreachable ();
4527 return true;
4530 /* Check whether it is possible to express the condition in USE by comparison
4531 of candidate CAND. If so, store the value compared with to BOUND, and the
4532 comparison operator to COMP. */
4534 static bool
4535 may_eliminate_iv (struct ivopts_data *data,
4536 struct iv_use *use, struct iv_cand *cand, tree *bound,
4537 enum tree_code *comp)
4539 basic_block ex_bb;
4540 edge exit;
4541 tree period;
4542 struct loop *loop = data->current_loop;
4543 aff_tree bnd;
4544 struct tree_niter_desc *desc = NULL;
4546 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4547 return false;
4549 /* For now works only for exits that dominate the loop latch.
4550 TODO: extend to other conditions inside loop body. */
4551 ex_bb = gimple_bb (use->stmt);
4552 if (use->stmt != last_stmt (ex_bb)
4553 || gimple_code (use->stmt) != GIMPLE_COND
4554 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4555 return false;
4557 exit = EDGE_SUCC (ex_bb, 0);
4558 if (flow_bb_inside_loop_p (loop, exit->dest))
4559 exit = EDGE_SUCC (ex_bb, 1);
4560 if (flow_bb_inside_loop_p (loop, exit->dest))
4561 return false;
4563 desc = niter_for_exit (data, exit);
4564 if (!desc)
4565 return false;
4567 /* Determine whether we can use the variable to test the exit condition.
4568 This is the case iff the period of the induction variable is greater
4569 than the number of iterations for which the exit condition is true. */
4570 period = iv_period (cand->iv);
4572 /* If the number of iterations is constant, compare against it directly. */
4573 if (TREE_CODE (desc->niter) == INTEGER_CST)
4575 /* See cand_value_at. */
4576 if (stmt_after_increment (loop, cand, use->stmt))
4578 if (!tree_int_cst_lt (desc->niter, period))
4579 return false;
4581 else
4583 if (tree_int_cst_lt (period, desc->niter))
4584 return false;
4588 /* If not, and if this is the only possible exit of the loop, see whether
4589 we can get a conservative estimate on the number of iterations of the
4590 entire loop and compare against that instead. */
4591 else
4593 double_int period_value, max_niter;
4595 max_niter = desc->max;
4596 if (stmt_after_increment (loop, cand, use->stmt))
4597 max_niter = double_int_add (max_niter, double_int_one);
4598 period_value = tree_to_double_int (period);
4599 if (double_int_ucmp (max_niter, period_value) > 0)
4601 /* See if we can take advantage of inferred loop bound information. */
4602 if (data->loop_single_exit_p)
4604 if (!max_loop_iterations (loop, &max_niter))
4605 return false;
4606 /* The loop bound is already adjusted by adding 1. */
4607 if (double_int_ucmp (max_niter, period_value) > 0)
4608 return false;
4610 else
4611 return false;
4615 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4617 *bound = aff_combination_to_tree (&bnd);
4618 *comp = iv_elimination_compare (data, use);
4620 /* It is unlikely that computing the number of iterations using division
4621 would be more profitable than keeping the original induction variable. */
4622 if (expression_expensive_p (*bound))
4623 return false;
4625 /* Sometimes, it is possible to handle the situation that the number of
4626 iterations may be zero unless additional assumtions by using <
4627 instead of != in the exit condition.
4629 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4630 base the exit condition on it. However, that is often too
4631 expensive. */
4632 if (!integer_zerop (desc->may_be_zero))
4633 return iv_elimination_compare_lt (data, cand, comp, desc);
4635 return true;
4638 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4639 be copied, if is is used in the loop body and DATA->body_includes_call. */
4641 static int
4642 parm_decl_cost (struct ivopts_data *data, tree bound)
4644 tree sbound = bound;
4645 STRIP_NOPS (sbound);
4647 if (TREE_CODE (sbound) == SSA_NAME
4648 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4649 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4650 && data->body_includes_call)
4651 return COSTS_N_INSNS (1);
4653 return 0;
4656 /* Determines cost of basing replacement of USE on CAND in a condition. */
4658 static bool
4659 determine_use_iv_cost_condition (struct ivopts_data *data,
4660 struct iv_use *use, struct iv_cand *cand)
4662 tree bound = NULL_TREE;
4663 struct iv *cmp_iv;
4664 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4665 comp_cost elim_cost, express_cost, cost, bound_cost;
4666 bool ok;
4667 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4668 tree *control_var, *bound_cst;
4669 enum tree_code comp = ERROR_MARK;
4671 /* Only consider real candidates. */
4672 if (!cand->iv)
4674 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4675 ERROR_MARK, -1);
4676 return false;
4679 /* Try iv elimination. */
4680 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4682 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4683 if (elim_cost.cost == 0)
4684 elim_cost.cost = parm_decl_cost (data, bound);
4685 else if (TREE_CODE (bound) == INTEGER_CST)
4686 elim_cost.cost = 0;
4687 /* If we replace a loop condition 'i < n' with 'p < base + n',
4688 depends_on_elim will have 'base' and 'n' set, which implies
4689 that both 'base' and 'n' will be live during the loop. More likely,
4690 'base + n' will be loop invariant, resulting in only one live value
4691 during the loop. So in that case we clear depends_on_elim and set
4692 elim_inv_expr_id instead. */
4693 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4695 elim_inv_expr_id = get_expr_id (data, bound);
4696 bitmap_clear (depends_on_elim);
4698 /* The bound is a loop invariant, so it will be only computed
4699 once. */
4700 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4702 else
4703 elim_cost = infinite_cost;
4705 /* Try expressing the original giv. If it is compared with an invariant,
4706 note that we cannot get rid of it. */
4707 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4708 NULL, &cmp_iv);
4709 gcc_assert (ok);
4711 /* When the condition is a comparison of the candidate IV against
4712 zero, prefer this IV.
4714 TODO: The constant that we're subtracting from the cost should
4715 be target-dependent. This information should be added to the
4716 target costs for each backend. */
4717 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4718 && integer_zerop (*bound_cst)
4719 && (operand_equal_p (*control_var, cand->var_after, 0)
4720 || operand_equal_p (*control_var, cand->var_before, 0)))
4721 elim_cost.cost -= 1;
4723 express_cost = get_computation_cost (data, use, cand, false,
4724 &depends_on_express, NULL,
4725 &express_inv_expr_id);
4726 fd_ivopts_data = data;
4727 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4729 /* Count the cost of the original bound as well. */
4730 bound_cost = force_var_cost (data, *bound_cst, NULL);
4731 if (bound_cost.cost == 0)
4732 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4733 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4734 bound_cost.cost = 0;
4735 express_cost.cost += bound_cost.cost;
4737 /* Choose the better approach, preferring the eliminated IV. */
4738 if (compare_costs (elim_cost, express_cost) <= 0)
4740 cost = elim_cost;
4741 depends_on = depends_on_elim;
4742 depends_on_elim = NULL;
4743 inv_expr_id = elim_inv_expr_id;
4745 else
4747 cost = express_cost;
4748 depends_on = depends_on_express;
4749 depends_on_express = NULL;
4750 bound = NULL_TREE;
4751 comp = ERROR_MARK;
4752 inv_expr_id = express_inv_expr_id;
4755 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4757 if (depends_on_elim)
4758 BITMAP_FREE (depends_on_elim);
4759 if (depends_on_express)
4760 BITMAP_FREE (depends_on_express);
4762 return !infinite_cost_p (cost);
4765 /* Determines cost of basing replacement of USE on CAND. Returns false
4766 if USE cannot be based on CAND. */
4768 static bool
4769 determine_use_iv_cost (struct ivopts_data *data,
4770 struct iv_use *use, struct iv_cand *cand)
4772 switch (use->type)
4774 case USE_NONLINEAR_EXPR:
4775 return determine_use_iv_cost_generic (data, use, cand);
4777 case USE_ADDRESS:
4778 return determine_use_iv_cost_address (data, use, cand);
4780 case USE_COMPARE:
4781 return determine_use_iv_cost_condition (data, use, cand);
4783 default:
4784 gcc_unreachable ();
4788 /* Return true if get_computation_cost indicates that autoincrement is
4789 a possibility for the pair of USE and CAND, false otherwise. */
4791 static bool
4792 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4793 struct iv_cand *cand)
4795 bitmap depends_on;
4796 bool can_autoinc;
4797 comp_cost cost;
4799 if (use->type != USE_ADDRESS)
4800 return false;
4802 cost = get_computation_cost (data, use, cand, true, &depends_on,
4803 &can_autoinc, NULL);
4805 BITMAP_FREE (depends_on);
4807 return !infinite_cost_p (cost) && can_autoinc;
4810 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4811 use that allows autoincrement, and set their AINC_USE if possible. */
4813 static void
4814 set_autoinc_for_original_candidates (struct ivopts_data *data)
4816 unsigned i, j;
4818 for (i = 0; i < n_iv_cands (data); i++)
4820 struct iv_cand *cand = iv_cand (data, i);
4821 struct iv_use *closest = NULL;
4822 if (cand->pos != IP_ORIGINAL)
4823 continue;
4824 for (j = 0; j < n_iv_uses (data); j++)
4826 struct iv_use *use = iv_use (data, j);
4827 unsigned uid = gimple_uid (use->stmt);
4828 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4829 || uid > gimple_uid (cand->incremented_at))
4830 continue;
4831 if (closest == NULL || uid > gimple_uid (closest->stmt))
4832 closest = use;
4834 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4835 continue;
4836 cand->ainc_use = closest;
4840 /* Finds the candidates for the induction variables. */
4842 static void
4843 find_iv_candidates (struct ivopts_data *data)
4845 /* Add commonly used ivs. */
4846 add_standard_iv_candidates (data);
4848 /* Add old induction variables. */
4849 add_old_ivs_candidates (data);
4851 /* Add induction variables derived from uses. */
4852 add_derived_ivs_candidates (data);
4854 set_autoinc_for_original_candidates (data);
4856 /* Record the important candidates. */
4857 record_important_candidates (data);
4860 /* Determines costs of basing the use of the iv on an iv candidate. */
4862 static void
4863 determine_use_iv_costs (struct ivopts_data *data)
4865 unsigned i, j;
4866 struct iv_use *use;
4867 struct iv_cand *cand;
4868 bitmap to_clear = BITMAP_ALLOC (NULL);
4870 alloc_use_cost_map (data);
4872 for (i = 0; i < n_iv_uses (data); i++)
4874 use = iv_use (data, i);
4876 if (data->consider_all_candidates)
4878 for (j = 0; j < n_iv_cands (data); j++)
4880 cand = iv_cand (data, j);
4881 determine_use_iv_cost (data, use, cand);
4884 else
4886 bitmap_iterator bi;
4888 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4890 cand = iv_cand (data, j);
4891 if (!determine_use_iv_cost (data, use, cand))
4892 bitmap_set_bit (to_clear, j);
4895 /* Remove the candidates for that the cost is infinite from
4896 the list of related candidates. */
4897 bitmap_and_compl_into (use->related_cands, to_clear);
4898 bitmap_clear (to_clear);
4902 BITMAP_FREE (to_clear);
4904 if (dump_file && (dump_flags & TDF_DETAILS))
4906 fprintf (dump_file, "Use-candidate costs:\n");
4908 for (i = 0; i < n_iv_uses (data); i++)
4910 use = iv_use (data, i);
4912 fprintf (dump_file, "Use %d:\n", i);
4913 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4914 for (j = 0; j < use->n_map_members; j++)
4916 if (!use->cost_map[j].cand
4917 || infinite_cost_p (use->cost_map[j].cost))
4918 continue;
4920 fprintf (dump_file, " %d\t%d\t%d\t",
4921 use->cost_map[j].cand->id,
4922 use->cost_map[j].cost.cost,
4923 use->cost_map[j].cost.complexity);
4924 if (use->cost_map[j].depends_on)
4925 bitmap_print (dump_file,
4926 use->cost_map[j].depends_on, "","");
4927 if (use->cost_map[j].inv_expr_id != -1)
4928 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4929 fprintf (dump_file, "\n");
4932 fprintf (dump_file, "\n");
4934 fprintf (dump_file, "\n");
4938 /* Determines cost of the candidate CAND. */
4940 static void
4941 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4943 comp_cost cost_base;
4944 unsigned cost, cost_step;
4945 tree base;
4947 if (!cand->iv)
4949 cand->cost = 0;
4950 return;
4953 /* There are two costs associated with the candidate -- its increment
4954 and its initialization. The second is almost negligible for any loop
4955 that rolls enough, so we take it just very little into account. */
4957 base = cand->iv->base;
4958 cost_base = force_var_cost (data, base, NULL);
4959 /* It will be exceptional that the iv register happens to be initialized with
4960 the proper value at no cost. In general, there will at least be a regcopy
4961 or a const set. */
4962 if (cost_base.cost == 0)
4963 cost_base.cost = COSTS_N_INSNS (1);
4964 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4966 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4968 /* Prefer the original ivs unless we may gain something by replacing it.
4969 The reason is to make debugging simpler; so this is not relevant for
4970 artificial ivs created by other optimization passes. */
4971 if (cand->pos != IP_ORIGINAL
4972 || !SSA_NAME_VAR (cand->var_before)
4973 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4974 cost++;
4976 /* Prefer not to insert statements into latch unless there are some
4977 already (so that we do not create unnecessary jumps). */
4978 if (cand->pos == IP_END
4979 && empty_block_p (ip_end_pos (data->current_loop)))
4980 cost++;
4982 cand->cost = cost;
4983 cand->cost_step = cost_step;
4986 /* Determines costs of computation of the candidates. */
4988 static void
4989 determine_iv_costs (struct ivopts_data *data)
4991 unsigned i;
4993 if (dump_file && (dump_flags & TDF_DETAILS))
4995 fprintf (dump_file, "Candidate costs:\n");
4996 fprintf (dump_file, " cand\tcost\n");
4999 for (i = 0; i < n_iv_cands (data); i++)
5001 struct iv_cand *cand = iv_cand (data, i);
5003 determine_iv_cost (data, cand);
5005 if (dump_file && (dump_flags & TDF_DETAILS))
5006 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5009 if (dump_file && (dump_flags & TDF_DETAILS))
5010 fprintf (dump_file, "\n");
5013 /* Calculates cost for having SIZE induction variables. */
5015 static unsigned
5016 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5018 /* We add size to the cost, so that we prefer eliminating ivs
5019 if possible. */
5020 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5021 data->body_includes_call);
5024 /* For each size of the induction variable set determine the penalty. */
5026 static void
5027 determine_set_costs (struct ivopts_data *data)
5029 unsigned j, n;
5030 gimple phi;
5031 gimple_stmt_iterator psi;
5032 tree op;
5033 struct loop *loop = data->current_loop;
5034 bitmap_iterator bi;
5036 if (dump_file && (dump_flags & TDF_DETAILS))
5038 fprintf (dump_file, "Global costs:\n");
5039 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5040 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5041 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5042 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5045 n = 0;
5046 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5048 phi = gsi_stmt (psi);
5049 op = PHI_RESULT (phi);
5051 if (virtual_operand_p (op))
5052 continue;
5054 if (get_iv (data, op))
5055 continue;
5057 n++;
5060 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5062 struct version_info *info = ver_info (data, j);
5064 if (info->inv_id && info->has_nonlin_use)
5065 n++;
5068 data->regs_used = n;
5069 if (dump_file && (dump_flags & TDF_DETAILS))
5070 fprintf (dump_file, " regs_used %d\n", n);
5072 if (dump_file && (dump_flags & TDF_DETAILS))
5074 fprintf (dump_file, " cost for size:\n");
5075 fprintf (dump_file, " ivs\tcost\n");
5076 for (j = 0; j <= 2 * target_avail_regs; j++)
5077 fprintf (dump_file, " %d\t%d\n", j,
5078 ivopts_global_cost_for_size (data, j));
5079 fprintf (dump_file, "\n");
5083 /* Returns true if A is a cheaper cost pair than B. */
5085 static bool
5086 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5088 int cmp;
5090 if (!a)
5091 return false;
5093 if (!b)
5094 return true;
5096 cmp = compare_costs (a->cost, b->cost);
5097 if (cmp < 0)
5098 return true;
5100 if (cmp > 0)
5101 return false;
5103 /* In case the costs are the same, prefer the cheaper candidate. */
5104 if (a->cand->cost < b->cand->cost)
5105 return true;
5107 return false;
5111 /* Returns candidate by that USE is expressed in IVS. */
5113 static struct cost_pair *
5114 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5116 return ivs->cand_for_use[use->id];
5119 /* Computes the cost field of IVS structure. */
5121 static void
5122 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5124 comp_cost cost = ivs->cand_use_cost;
5126 cost.cost += ivs->cand_cost;
5128 cost.cost += ivopts_global_cost_for_size (data,
5129 ivs->n_regs + ivs->num_used_inv_expr);
5131 ivs->cost = cost;
5134 /* Remove invariants in set INVS to set IVS. */
5136 static void
5137 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5139 bitmap_iterator bi;
5140 unsigned iid;
5142 if (!invs)
5143 return;
5145 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5147 ivs->n_invariant_uses[iid]--;
5148 if (ivs->n_invariant_uses[iid] == 0)
5149 ivs->n_regs--;
5153 /* Set USE not to be expressed by any candidate in IVS. */
5155 static void
5156 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5157 struct iv_use *use)
5159 unsigned uid = use->id, cid;
5160 struct cost_pair *cp;
5162 cp = ivs->cand_for_use[uid];
5163 if (!cp)
5164 return;
5165 cid = cp->cand->id;
5167 ivs->bad_uses++;
5168 ivs->cand_for_use[uid] = NULL;
5169 ivs->n_cand_uses[cid]--;
5171 if (ivs->n_cand_uses[cid] == 0)
5173 bitmap_clear_bit (ivs->cands, cid);
5174 /* Do not count the pseudocandidates. */
5175 if (cp->cand->iv)
5176 ivs->n_regs--;
5177 ivs->n_cands--;
5178 ivs->cand_cost -= cp->cand->cost;
5180 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5183 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5185 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5187 if (cp->inv_expr_id != -1)
5189 ivs->used_inv_expr[cp->inv_expr_id]--;
5190 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5191 ivs->num_used_inv_expr--;
5193 iv_ca_recount_cost (data, ivs);
5196 /* Add invariants in set INVS to set IVS. */
5198 static void
5199 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5201 bitmap_iterator bi;
5202 unsigned iid;
5204 if (!invs)
5205 return;
5207 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5209 ivs->n_invariant_uses[iid]++;
5210 if (ivs->n_invariant_uses[iid] == 1)
5211 ivs->n_regs++;
5215 /* Set cost pair for USE in set IVS to CP. */
5217 static void
5218 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5219 struct iv_use *use, struct cost_pair *cp)
5221 unsigned uid = use->id, cid;
5223 if (ivs->cand_for_use[uid] == cp)
5224 return;
5226 if (ivs->cand_for_use[uid])
5227 iv_ca_set_no_cp (data, ivs, use);
5229 if (cp)
5231 cid = cp->cand->id;
5233 ivs->bad_uses--;
5234 ivs->cand_for_use[uid] = cp;
5235 ivs->n_cand_uses[cid]++;
5236 if (ivs->n_cand_uses[cid] == 1)
5238 bitmap_set_bit (ivs->cands, cid);
5239 /* Do not count the pseudocandidates. */
5240 if (cp->cand->iv)
5241 ivs->n_regs++;
5242 ivs->n_cands++;
5243 ivs->cand_cost += cp->cand->cost;
5245 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5248 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5249 iv_ca_set_add_invariants (ivs, cp->depends_on);
5251 if (cp->inv_expr_id != -1)
5253 ivs->used_inv_expr[cp->inv_expr_id]++;
5254 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5255 ivs->num_used_inv_expr++;
5257 iv_ca_recount_cost (data, ivs);
5261 /* Extend set IVS by expressing USE by some of the candidates in it
5262 if possible. All important candidates will be considered
5263 if IMPORTANT_CANDIDATES is true. */
5265 static void
5266 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5267 struct iv_use *use, bool important_candidates)
5269 struct cost_pair *best_cp = NULL, *cp;
5270 bitmap_iterator bi;
5271 bitmap cands;
5272 unsigned i;
5274 gcc_assert (ivs->upto >= use->id);
5276 if (ivs->upto == use->id)
5278 ivs->upto++;
5279 ivs->bad_uses++;
5282 cands = (important_candidates ? data->important_candidates : ivs->cands);
5283 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5285 struct iv_cand *cand = iv_cand (data, i);
5287 cp = get_use_iv_cost (data, use, cand);
5289 if (cheaper_cost_pair (cp, best_cp))
5290 best_cp = cp;
5293 iv_ca_set_cp (data, ivs, use, best_cp);
5296 /* Get cost for assignment IVS. */
5298 static comp_cost
5299 iv_ca_cost (struct iv_ca *ivs)
5301 /* This was a conditional expression but it triggered a bug in
5302 Sun C 5.5. */
5303 if (ivs->bad_uses)
5304 return infinite_cost;
5305 else
5306 return ivs->cost;
5309 /* Returns true if all dependences of CP are among invariants in IVS. */
5311 static bool
5312 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5314 unsigned i;
5315 bitmap_iterator bi;
5317 if (!cp->depends_on)
5318 return true;
5320 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5322 if (ivs->n_invariant_uses[i] == 0)
5323 return false;
5326 return true;
5329 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5330 it before NEXT_CHANGE. */
5332 static struct iv_ca_delta *
5333 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5334 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5336 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5338 change->use = use;
5339 change->old_cp = old_cp;
5340 change->new_cp = new_cp;
5341 change->next_change = next_change;
5343 return change;
5346 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5347 are rewritten. */
5349 static struct iv_ca_delta *
5350 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5352 struct iv_ca_delta *last;
5354 if (!l2)
5355 return l1;
5357 if (!l1)
5358 return l2;
5360 for (last = l1; last->next_change; last = last->next_change)
5361 continue;
5362 last->next_change = l2;
5364 return l1;
5367 /* Reverse the list of changes DELTA, forming the inverse to it. */
5369 static struct iv_ca_delta *
5370 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5372 struct iv_ca_delta *act, *next, *prev = NULL;
5373 struct cost_pair *tmp;
5375 for (act = delta; act; act = next)
5377 next = act->next_change;
5378 act->next_change = prev;
5379 prev = act;
5381 tmp = act->old_cp;
5382 act->old_cp = act->new_cp;
5383 act->new_cp = tmp;
5386 return prev;
5389 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5390 reverted instead. */
5392 static void
5393 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5394 struct iv_ca_delta *delta, bool forward)
5396 struct cost_pair *from, *to;
5397 struct iv_ca_delta *act;
5399 if (!forward)
5400 delta = iv_ca_delta_reverse (delta);
5402 for (act = delta; act; act = act->next_change)
5404 from = act->old_cp;
5405 to = act->new_cp;
5406 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5407 iv_ca_set_cp (data, ivs, act->use, to);
5410 if (!forward)
5411 iv_ca_delta_reverse (delta);
5414 /* Returns true if CAND is used in IVS. */
5416 static bool
5417 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5419 return ivs->n_cand_uses[cand->id] > 0;
5422 /* Returns number of induction variable candidates in the set IVS. */
5424 static unsigned
5425 iv_ca_n_cands (struct iv_ca *ivs)
5427 return ivs->n_cands;
5430 /* Free the list of changes DELTA. */
5432 static void
5433 iv_ca_delta_free (struct iv_ca_delta **delta)
5435 struct iv_ca_delta *act, *next;
5437 for (act = *delta; act; act = next)
5439 next = act->next_change;
5440 free (act);
5443 *delta = NULL;
5446 /* Allocates new iv candidates assignment. */
5448 static struct iv_ca *
5449 iv_ca_new (struct ivopts_data *data)
5451 struct iv_ca *nw = XNEW (struct iv_ca);
5453 nw->upto = 0;
5454 nw->bad_uses = 0;
5455 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5456 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5457 nw->cands = BITMAP_ALLOC (NULL);
5458 nw->n_cands = 0;
5459 nw->n_regs = 0;
5460 nw->cand_use_cost = no_cost;
5461 nw->cand_cost = 0;
5462 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5463 nw->cost = no_cost;
5464 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5465 nw->num_used_inv_expr = 0;
5467 return nw;
5470 /* Free memory occupied by the set IVS. */
5472 static void
5473 iv_ca_free (struct iv_ca **ivs)
5475 free ((*ivs)->cand_for_use);
5476 free ((*ivs)->n_cand_uses);
5477 BITMAP_FREE ((*ivs)->cands);
5478 free ((*ivs)->n_invariant_uses);
5479 free ((*ivs)->used_inv_expr);
5480 free (*ivs);
5481 *ivs = NULL;
5484 /* Dumps IVS to FILE. */
5486 static void
5487 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5489 const char *pref = " invariants ";
5490 unsigned i;
5491 comp_cost cost = iv_ca_cost (ivs);
5493 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5494 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5495 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5496 bitmap_print (file, ivs->cands, " candidates: ","\n");
5498 for (i = 0; i < ivs->upto; i++)
5500 struct iv_use *use = iv_use (data, i);
5501 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5502 if (cp)
5503 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5504 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5505 else
5506 fprintf (file, " use:%d --> ??\n", use->id);
5509 for (i = 1; i <= data->max_inv_id; i++)
5510 if (ivs->n_invariant_uses[i])
5512 fprintf (file, "%s%d", pref, i);
5513 pref = ", ";
5515 fprintf (file, "\n\n");
5518 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5519 new set, and store differences in DELTA. Number of induction variables
5520 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5521 the function will try to find a solution with mimimal iv candidates. */
5523 static comp_cost
5524 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5525 struct iv_cand *cand, struct iv_ca_delta **delta,
5526 unsigned *n_ivs, bool min_ncand)
5528 unsigned i;
5529 comp_cost cost;
5530 struct iv_use *use;
5531 struct cost_pair *old_cp, *new_cp;
5533 *delta = NULL;
5534 for (i = 0; i < ivs->upto; i++)
5536 use = iv_use (data, i);
5537 old_cp = iv_ca_cand_for_use (ivs, use);
5539 if (old_cp
5540 && old_cp->cand == cand)
5541 continue;
5543 new_cp = get_use_iv_cost (data, use, cand);
5544 if (!new_cp)
5545 continue;
5547 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5548 continue;
5550 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5551 continue;
5553 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5556 iv_ca_delta_commit (data, ivs, *delta, true);
5557 cost = iv_ca_cost (ivs);
5558 if (n_ivs)
5559 *n_ivs = iv_ca_n_cands (ivs);
5560 iv_ca_delta_commit (data, ivs, *delta, false);
5562 return cost;
5565 /* Try narrowing set IVS by removing CAND. Return the cost of
5566 the new set and store the differences in DELTA. */
5568 static comp_cost
5569 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5570 struct iv_cand *cand, struct iv_ca_delta **delta)
5572 unsigned i, ci;
5573 struct iv_use *use;
5574 struct cost_pair *old_cp, *new_cp, *cp;
5575 bitmap_iterator bi;
5576 struct iv_cand *cnd;
5577 comp_cost cost;
5579 *delta = NULL;
5580 for (i = 0; i < n_iv_uses (data); i++)
5582 use = iv_use (data, i);
5584 old_cp = iv_ca_cand_for_use (ivs, use);
5585 if (old_cp->cand != cand)
5586 continue;
5588 new_cp = NULL;
5590 if (data->consider_all_candidates)
5592 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5594 if (ci == cand->id)
5595 continue;
5597 cnd = iv_cand (data, ci);
5599 cp = get_use_iv_cost (data, use, cnd);
5600 if (!cp)
5601 continue;
5603 if (!iv_ca_has_deps (ivs, cp))
5604 continue;
5606 if (!cheaper_cost_pair (cp, new_cp))
5607 continue;
5609 new_cp = cp;
5612 else
5614 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5616 if (ci == cand->id)
5617 continue;
5619 cnd = iv_cand (data, ci);
5621 cp = get_use_iv_cost (data, use, cnd);
5622 if (!cp)
5623 continue;
5624 if (!iv_ca_has_deps (ivs, cp))
5625 continue;
5627 if (!cheaper_cost_pair (cp, new_cp))
5628 continue;
5630 new_cp = cp;
5634 if (!new_cp)
5636 iv_ca_delta_free (delta);
5637 return infinite_cost;
5640 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5643 iv_ca_delta_commit (data, ivs, *delta, true);
5644 cost = iv_ca_cost (ivs);
5645 iv_ca_delta_commit (data, ivs, *delta, false);
5647 return cost;
5650 /* Try optimizing the set of candidates IVS by removing candidates different
5651 from to EXCEPT_CAND from it. Return cost of the new set, and store
5652 differences in DELTA. */
5654 static comp_cost
5655 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5656 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5658 bitmap_iterator bi;
5659 struct iv_ca_delta *act_delta, *best_delta;
5660 unsigned i;
5661 comp_cost best_cost, acost;
5662 struct iv_cand *cand;
5664 best_delta = NULL;
5665 best_cost = iv_ca_cost (ivs);
5667 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5669 cand = iv_cand (data, i);
5671 if (cand == except_cand)
5672 continue;
5674 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5676 if (compare_costs (acost, best_cost) < 0)
5678 best_cost = acost;
5679 iv_ca_delta_free (&best_delta);
5680 best_delta = act_delta;
5682 else
5683 iv_ca_delta_free (&act_delta);
5686 if (!best_delta)
5688 *delta = NULL;
5689 return best_cost;
5692 /* Recurse to possibly remove other unnecessary ivs. */
5693 iv_ca_delta_commit (data, ivs, best_delta, true);
5694 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5695 iv_ca_delta_commit (data, ivs, best_delta, false);
5696 *delta = iv_ca_delta_join (best_delta, *delta);
5697 return best_cost;
5700 /* Tries to extend the sets IVS in the best possible way in order
5701 to express the USE. If ORIGINALP is true, prefer candidates from
5702 the original set of IVs, otherwise favor important candidates not
5703 based on any memory object. */
5705 static bool
5706 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5707 struct iv_use *use, bool originalp)
5709 comp_cost best_cost, act_cost;
5710 unsigned i;
5711 bitmap_iterator bi;
5712 struct iv_cand *cand;
5713 struct iv_ca_delta *best_delta = NULL, *act_delta;
5714 struct cost_pair *cp;
5716 iv_ca_add_use (data, ivs, use, false);
5717 best_cost = iv_ca_cost (ivs);
5719 cp = iv_ca_cand_for_use (ivs, use);
5720 if (!cp)
5722 ivs->upto--;
5723 ivs->bad_uses--;
5724 iv_ca_add_use (data, ivs, use, true);
5725 best_cost = iv_ca_cost (ivs);
5726 cp = iv_ca_cand_for_use (ivs, use);
5728 if (cp)
5730 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5731 iv_ca_set_no_cp (data, ivs, use);
5734 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5735 first try important candidates not based on any memory object. Only if
5736 this fails, try the specific ones. Rationale -- in loops with many
5737 variables the best choice often is to use just one generic biv. If we
5738 added here many ivs specific to the uses, the optimization algorithm later
5739 would be likely to get stuck in a local minimum, thus causing us to create
5740 too many ivs. The approach from few ivs to more seems more likely to be
5741 successful -- starting from few ivs, replacing an expensive use by a
5742 specific iv should always be a win. */
5743 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5745 cand = iv_cand (data, i);
5747 if (originalp && cand->pos !=IP_ORIGINAL)
5748 continue;
5750 if (!originalp && cand->iv->base_object != NULL_TREE)
5751 continue;
5753 if (iv_ca_cand_used_p (ivs, cand))
5754 continue;
5756 cp = get_use_iv_cost (data, use, cand);
5757 if (!cp)
5758 continue;
5760 iv_ca_set_cp (data, ivs, use, cp);
5761 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5762 true);
5763 iv_ca_set_no_cp (data, ivs, use);
5764 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5766 if (compare_costs (act_cost, best_cost) < 0)
5768 best_cost = act_cost;
5770 iv_ca_delta_free (&best_delta);
5771 best_delta = act_delta;
5773 else
5774 iv_ca_delta_free (&act_delta);
5777 if (infinite_cost_p (best_cost))
5779 for (i = 0; i < use->n_map_members; i++)
5781 cp = use->cost_map + i;
5782 cand = cp->cand;
5783 if (!cand)
5784 continue;
5786 /* Already tried this. */
5787 if (cand->important)
5789 if (originalp && cand->pos == IP_ORIGINAL)
5790 continue;
5791 if (!originalp && cand->iv->base_object == NULL_TREE)
5792 continue;
5795 if (iv_ca_cand_used_p (ivs, cand))
5796 continue;
5798 act_delta = NULL;
5799 iv_ca_set_cp (data, ivs, use, cp);
5800 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5801 iv_ca_set_no_cp (data, ivs, use);
5802 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5803 cp, act_delta);
5805 if (compare_costs (act_cost, best_cost) < 0)
5807 best_cost = act_cost;
5809 if (best_delta)
5810 iv_ca_delta_free (&best_delta);
5811 best_delta = act_delta;
5813 else
5814 iv_ca_delta_free (&act_delta);
5818 iv_ca_delta_commit (data, ivs, best_delta, true);
5819 iv_ca_delta_free (&best_delta);
5821 return !infinite_cost_p (best_cost);
5824 /* Finds an initial assignment of candidates to uses. */
5826 static struct iv_ca *
5827 get_initial_solution (struct ivopts_data *data, bool originalp)
5829 struct iv_ca *ivs = iv_ca_new (data);
5830 unsigned i;
5832 for (i = 0; i < n_iv_uses (data); i++)
5833 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5835 iv_ca_free (&ivs);
5836 return NULL;
5839 return ivs;
5842 /* Tries to improve set of induction variables IVS. */
5844 static bool
5845 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5847 unsigned i, n_ivs;
5848 comp_cost acost, best_cost = iv_ca_cost (ivs);
5849 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5850 struct iv_cand *cand;
5852 /* Try extending the set of induction variables by one. */
5853 for (i = 0; i < n_iv_cands (data); i++)
5855 cand = iv_cand (data, i);
5857 if (iv_ca_cand_used_p (ivs, cand))
5858 continue;
5860 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5861 if (!act_delta)
5862 continue;
5864 /* If we successfully added the candidate and the set is small enough,
5865 try optimizing it by removing other candidates. */
5866 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5868 iv_ca_delta_commit (data, ivs, act_delta, true);
5869 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5870 iv_ca_delta_commit (data, ivs, act_delta, false);
5871 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5874 if (compare_costs (acost, best_cost) < 0)
5876 best_cost = acost;
5877 iv_ca_delta_free (&best_delta);
5878 best_delta = act_delta;
5880 else
5881 iv_ca_delta_free (&act_delta);
5884 if (!best_delta)
5886 /* Try removing the candidates from the set instead. */
5887 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5889 /* Nothing more we can do. */
5890 if (!best_delta)
5891 return false;
5894 iv_ca_delta_commit (data, ivs, best_delta, true);
5895 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5896 iv_ca_delta_free (&best_delta);
5897 return true;
5900 /* Attempts to find the optimal set of induction variables. We do simple
5901 greedy heuristic -- we try to replace at most one candidate in the selected
5902 solution and remove the unused ivs while this improves the cost. */
5904 static struct iv_ca *
5905 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5907 struct iv_ca *set;
5909 /* Get the initial solution. */
5910 set = get_initial_solution (data, originalp);
5911 if (!set)
5913 if (dump_file && (dump_flags & TDF_DETAILS))
5914 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5915 return NULL;
5918 if (dump_file && (dump_flags & TDF_DETAILS))
5920 fprintf (dump_file, "Initial set of candidates:\n");
5921 iv_ca_dump (data, dump_file, set);
5924 while (try_improve_iv_set (data, set))
5926 if (dump_file && (dump_flags & TDF_DETAILS))
5928 fprintf (dump_file, "Improved to:\n");
5929 iv_ca_dump (data, dump_file, set);
5933 return set;
5936 static struct iv_ca *
5937 find_optimal_iv_set (struct ivopts_data *data)
5939 unsigned i;
5940 struct iv_ca *set, *origset;
5941 struct iv_use *use;
5942 comp_cost cost, origcost;
5944 /* Determine the cost based on a strategy that starts with original IVs,
5945 and try again using a strategy that prefers candidates not based
5946 on any IVs. */
5947 origset = find_optimal_iv_set_1 (data, true);
5948 set = find_optimal_iv_set_1 (data, false);
5950 if (!origset && !set)
5951 return NULL;
5953 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5954 cost = set ? iv_ca_cost (set) : infinite_cost;
5956 if (dump_file && (dump_flags & TDF_DETAILS))
5958 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5959 origcost.cost, origcost.complexity);
5960 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5961 cost.cost, cost.complexity);
5964 /* Choose the one with the best cost. */
5965 if (compare_costs (origcost, cost) <= 0)
5967 if (set)
5968 iv_ca_free (&set);
5969 set = origset;
5971 else if (origset)
5972 iv_ca_free (&origset);
5974 for (i = 0; i < n_iv_uses (data); i++)
5976 use = iv_use (data, i);
5977 use->selected = iv_ca_cand_for_use (set, use)->cand;
5980 return set;
5983 /* Creates a new induction variable corresponding to CAND. */
5985 static void
5986 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5988 gimple_stmt_iterator incr_pos;
5989 tree base;
5990 bool after = false;
5992 if (!cand->iv)
5993 return;
5995 switch (cand->pos)
5997 case IP_NORMAL:
5998 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5999 break;
6001 case IP_END:
6002 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6003 after = true;
6004 break;
6006 case IP_AFTER_USE:
6007 after = true;
6008 /* fall through */
6009 case IP_BEFORE_USE:
6010 incr_pos = gsi_for_stmt (cand->incremented_at);
6011 break;
6013 case IP_ORIGINAL:
6014 /* Mark that the iv is preserved. */
6015 name_info (data, cand->var_before)->preserve_biv = true;
6016 name_info (data, cand->var_after)->preserve_biv = true;
6018 /* Rewrite the increment so that it uses var_before directly. */
6019 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6020 return;
6023 gimple_add_tmp_var (cand->var_before);
6025 base = unshare_expr (cand->iv->base);
6027 create_iv (base, unshare_expr (cand->iv->step),
6028 cand->var_before, data->current_loop,
6029 &incr_pos, after, &cand->var_before, &cand->var_after);
6032 /* Creates new induction variables described in SET. */
6034 static void
6035 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6037 unsigned i;
6038 struct iv_cand *cand;
6039 bitmap_iterator bi;
6041 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6043 cand = iv_cand (data, i);
6044 create_new_iv (data, cand);
6047 if (dump_file && (dump_flags & TDF_DETAILS))
6049 fprintf (dump_file, "\nSelected IV set: \n");
6050 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6052 cand = iv_cand (data, i);
6053 dump_cand (dump_file, cand);
6055 fprintf (dump_file, "\n");
6059 /* Rewrites USE (definition of iv used in a nonlinear expression)
6060 using candidate CAND. */
6062 static void
6063 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6064 struct iv_use *use, struct iv_cand *cand)
6066 tree comp;
6067 tree op, tgt;
6068 gimple ass;
6069 gimple_stmt_iterator bsi;
6071 /* An important special case -- if we are asked to express value of
6072 the original iv by itself, just exit; there is no need to
6073 introduce a new computation (that might also need casting the
6074 variable to unsigned and back). */
6075 if (cand->pos == IP_ORIGINAL
6076 && cand->incremented_at == use->stmt)
6078 tree step, ctype, utype;
6079 enum tree_code incr_code = PLUS_EXPR, old_code;
6081 gcc_assert (is_gimple_assign (use->stmt));
6082 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6084 step = cand->iv->step;
6085 ctype = TREE_TYPE (step);
6086 utype = TREE_TYPE (cand->var_after);
6087 if (TREE_CODE (step) == NEGATE_EXPR)
6089 incr_code = MINUS_EXPR;
6090 step = TREE_OPERAND (step, 0);
6093 /* Check whether we may leave the computation unchanged.
6094 This is the case only if it does not rely on other
6095 computations in the loop -- otherwise, the computation
6096 we rely upon may be removed in remove_unused_ivs,
6097 thus leading to ICE. */
6098 old_code = gimple_assign_rhs_code (use->stmt);
6099 if (old_code == PLUS_EXPR
6100 || old_code == MINUS_EXPR
6101 || old_code == POINTER_PLUS_EXPR)
6103 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6104 op = gimple_assign_rhs2 (use->stmt);
6105 else if (old_code != MINUS_EXPR
6106 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6107 op = gimple_assign_rhs1 (use->stmt);
6108 else
6109 op = NULL_TREE;
6111 else
6112 op = NULL_TREE;
6114 if (op
6115 && (TREE_CODE (op) == INTEGER_CST
6116 || operand_equal_p (op, step, 0)))
6117 return;
6119 /* Otherwise, add the necessary computations to express
6120 the iv. */
6121 op = fold_convert (ctype, cand->var_before);
6122 comp = fold_convert (utype,
6123 build2 (incr_code, ctype, op,
6124 unshare_expr (step)));
6126 else
6128 comp = get_computation (data->current_loop, use, cand);
6129 gcc_assert (comp != NULL_TREE);
6132 switch (gimple_code (use->stmt))
6134 case GIMPLE_PHI:
6135 tgt = PHI_RESULT (use->stmt);
6137 /* If we should keep the biv, do not replace it. */
6138 if (name_info (data, tgt)->preserve_biv)
6139 return;
6141 bsi = gsi_after_labels (gimple_bb (use->stmt));
6142 break;
6144 case GIMPLE_ASSIGN:
6145 tgt = gimple_assign_lhs (use->stmt);
6146 bsi = gsi_for_stmt (use->stmt);
6147 break;
6149 default:
6150 gcc_unreachable ();
6153 if (!valid_gimple_rhs_p (comp)
6154 || (gimple_code (use->stmt) != GIMPLE_PHI
6155 /* We can't allow re-allocating the stmt as it might be pointed
6156 to still. */
6157 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6158 >= gimple_num_ops (gsi_stmt (bsi)))))
6160 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6161 true, GSI_SAME_STMT);
6162 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6164 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6165 /* As this isn't a plain copy we have to reset alignment
6166 information. */
6167 if (SSA_NAME_PTR_INFO (comp))
6168 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6172 if (gimple_code (use->stmt) == GIMPLE_PHI)
6174 ass = gimple_build_assign (tgt, comp);
6175 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6177 bsi = gsi_for_stmt (use->stmt);
6178 remove_phi_node (&bsi, false);
6180 else
6182 gimple_assign_set_rhs_from_tree (&bsi, comp);
6183 use->stmt = gsi_stmt (bsi);
6187 /* Performs a peephole optimization to reorder the iv update statement with
6188 a mem ref to enable instruction combining in later phases. The mem ref uses
6189 the iv value before the update, so the reordering transformation requires
6190 adjustment of the offset. CAND is the selected IV_CAND.
6192 Example:
6194 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6195 iv2 = iv1 + 1;
6197 if (t < val) (1)
6198 goto L;
6199 goto Head;
6202 directly propagating t over to (1) will introduce overlapping live range
6203 thus increase register pressure. This peephole transform it into:
6206 iv2 = iv1 + 1;
6207 t = MEM_REF (base, iv2, 8, 8);
6208 if (t < val)
6209 goto L;
6210 goto Head;
6213 static void
6214 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6216 tree var_after;
6217 gimple iv_update, stmt;
6218 basic_block bb;
6219 gimple_stmt_iterator gsi, gsi_iv;
6221 if (cand->pos != IP_NORMAL)
6222 return;
6224 var_after = cand->var_after;
6225 iv_update = SSA_NAME_DEF_STMT (var_after);
6227 bb = gimple_bb (iv_update);
6228 gsi = gsi_last_nondebug_bb (bb);
6229 stmt = gsi_stmt (gsi);
6231 /* Only handle conditional statement for now. */
6232 if (gimple_code (stmt) != GIMPLE_COND)
6233 return;
6235 gsi_prev_nondebug (&gsi);
6236 stmt = gsi_stmt (gsi);
6237 if (stmt != iv_update)
6238 return;
6240 gsi_prev_nondebug (&gsi);
6241 if (gsi_end_p (gsi))
6242 return;
6244 stmt = gsi_stmt (gsi);
6245 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6246 return;
6248 if (stmt != use->stmt)
6249 return;
6251 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6252 return;
6254 if (dump_file && (dump_flags & TDF_DETAILS))
6256 fprintf (dump_file, "Reordering \n");
6257 print_gimple_stmt (dump_file, iv_update, 0, 0);
6258 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6259 fprintf (dump_file, "\n");
6262 gsi = gsi_for_stmt (use->stmt);
6263 gsi_iv = gsi_for_stmt (iv_update);
6264 gsi_move_before (&gsi_iv, &gsi);
6266 cand->pos = IP_BEFORE_USE;
6267 cand->incremented_at = use->stmt;
6270 /* Rewrites USE (address that is an iv) using candidate CAND. */
6272 static void
6273 rewrite_use_address (struct ivopts_data *data,
6274 struct iv_use *use, struct iv_cand *cand)
6276 aff_tree aff;
6277 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6278 tree base_hint = NULL_TREE;
6279 tree ref, iv;
6280 bool ok;
6282 adjust_iv_update_pos (cand, use);
6283 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6284 gcc_assert (ok);
6285 unshare_aff_combination (&aff);
6287 /* To avoid undefined overflow problems, all IV candidates use unsigned
6288 integer types. The drawback is that this makes it impossible for
6289 create_mem_ref to distinguish an IV that is based on a memory object
6290 from one that represents simply an offset.
6292 To work around this problem, we pass a hint to create_mem_ref that
6293 indicates which variable (if any) in aff is an IV based on a memory
6294 object. Note that we only consider the candidate. If this is not
6295 based on an object, the base of the reference is in some subexpression
6296 of the use -- but these will use pointer types, so they are recognized
6297 by the create_mem_ref heuristics anyway. */
6298 if (cand->iv->base_object)
6299 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6301 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6302 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6303 reference_alias_ptr_type (*use->op_p),
6304 iv, base_hint, data->speed);
6305 copy_ref_info (ref, *use->op_p);
6306 *use->op_p = ref;
6309 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6310 candidate CAND. */
6312 static void
6313 rewrite_use_compare (struct ivopts_data *data,
6314 struct iv_use *use, struct iv_cand *cand)
6316 tree comp, *var_p, op, bound;
6317 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6318 enum tree_code compare;
6319 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6320 bool ok;
6322 bound = cp->value;
6323 if (bound)
6325 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6326 tree var_type = TREE_TYPE (var);
6327 gimple_seq stmts;
6329 if (dump_file && (dump_flags & TDF_DETAILS))
6331 fprintf (dump_file, "Replacing exit test: ");
6332 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6334 compare = cp->comp;
6335 bound = unshare_expr (fold_convert (var_type, bound));
6336 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6337 if (stmts)
6338 gsi_insert_seq_on_edge_immediate (
6339 loop_preheader_edge (data->current_loop),
6340 stmts);
6342 gimple_cond_set_lhs (use->stmt, var);
6343 gimple_cond_set_code (use->stmt, compare);
6344 gimple_cond_set_rhs (use->stmt, op);
6345 return;
6348 /* The induction variable elimination failed; just express the original
6349 giv. */
6350 comp = get_computation (data->current_loop, use, cand);
6351 gcc_assert (comp != NULL_TREE);
6353 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6354 gcc_assert (ok);
6356 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6357 true, GSI_SAME_STMT);
6360 /* Rewrites USE using candidate CAND. */
6362 static void
6363 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6365 switch (use->type)
6367 case USE_NONLINEAR_EXPR:
6368 rewrite_use_nonlinear_expr (data, use, cand);
6369 break;
6371 case USE_ADDRESS:
6372 rewrite_use_address (data, use, cand);
6373 break;
6375 case USE_COMPARE:
6376 rewrite_use_compare (data, use, cand);
6377 break;
6379 default:
6380 gcc_unreachable ();
6383 update_stmt (use->stmt);
6386 /* Rewrite the uses using the selected induction variables. */
6388 static void
6389 rewrite_uses (struct ivopts_data *data)
6391 unsigned i;
6392 struct iv_cand *cand;
6393 struct iv_use *use;
6395 for (i = 0; i < n_iv_uses (data); i++)
6397 use = iv_use (data, i);
6398 cand = use->selected;
6399 gcc_assert (cand);
6401 rewrite_use (data, use, cand);
6405 /* Removes the ivs that are not used after rewriting. */
6407 static void
6408 remove_unused_ivs (struct ivopts_data *data)
6410 unsigned j;
6411 bitmap_iterator bi;
6412 bitmap toremove = BITMAP_ALLOC (NULL);
6414 /* Figure out an order in which to release SSA DEFs so that we don't
6415 release something that we'd have to propagate into a debug stmt
6416 afterwards. */
6417 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6419 struct version_info *info;
6421 info = ver_info (data, j);
6422 if (info->iv
6423 && !integer_zerop (info->iv->step)
6424 && !info->inv_id
6425 && !info->iv->have_use_for
6426 && !info->preserve_biv)
6427 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6430 release_defs_bitset (toremove);
6432 BITMAP_FREE (toremove);
6435 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6436 for pointer_map_traverse. */
6438 static bool
6439 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6440 void *data ATTRIBUTE_UNUSED)
6442 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6444 free (niter);
6445 return true;
6448 /* Frees data allocated by the optimization of a single loop. */
6450 static void
6451 free_loop_data (struct ivopts_data *data)
6453 unsigned i, j;
6454 bitmap_iterator bi;
6455 tree obj;
6457 if (data->niters)
6459 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6460 pointer_map_destroy (data->niters);
6461 data->niters = NULL;
6464 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6466 struct version_info *info;
6468 info = ver_info (data, i);
6469 free (info->iv);
6470 info->iv = NULL;
6471 info->has_nonlin_use = false;
6472 info->preserve_biv = false;
6473 info->inv_id = 0;
6475 bitmap_clear (data->relevant);
6476 bitmap_clear (data->important_candidates);
6478 for (i = 0; i < n_iv_uses (data); i++)
6480 struct iv_use *use = iv_use (data, i);
6482 free (use->iv);
6483 BITMAP_FREE (use->related_cands);
6484 for (j = 0; j < use->n_map_members; j++)
6485 if (use->cost_map[j].depends_on)
6486 BITMAP_FREE (use->cost_map[j].depends_on);
6487 free (use->cost_map);
6488 free (use);
6490 VEC_truncate (iv_use_p, data->iv_uses, 0);
6492 for (i = 0; i < n_iv_cands (data); i++)
6494 struct iv_cand *cand = iv_cand (data, i);
6496 free (cand->iv);
6497 if (cand->depends_on)
6498 BITMAP_FREE (cand->depends_on);
6499 free (cand);
6501 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6503 if (data->version_info_size < num_ssa_names)
6505 data->version_info_size = 2 * num_ssa_names;
6506 free (data->version_info);
6507 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6510 data->max_inv_id = 0;
6512 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6513 SET_DECL_RTL (obj, NULL_RTX);
6515 VEC_truncate (tree, decl_rtl_to_reset, 0);
6517 htab_empty (data->inv_expr_tab);
6518 data->inv_expr_id = 0;
6521 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6522 loop tree. */
6524 static void
6525 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6527 free_loop_data (data);
6528 free (data->version_info);
6529 BITMAP_FREE (data->relevant);
6530 BITMAP_FREE (data->important_candidates);
6532 VEC_free (tree, heap, decl_rtl_to_reset);
6533 VEC_free (iv_use_p, heap, data->iv_uses);
6534 VEC_free (iv_cand_p, heap, data->iv_candidates);
6535 htab_delete (data->inv_expr_tab);
6538 /* Returns true if the loop body BODY includes any function calls. */
6540 static bool
6541 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6543 gimple_stmt_iterator gsi;
6544 unsigned i;
6546 for (i = 0; i < num_nodes; i++)
6547 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6549 gimple stmt = gsi_stmt (gsi);
6550 if (is_gimple_call (stmt)
6551 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6552 return true;
6554 return false;
6557 /* Optimizes the LOOP. Returns true if anything changed. */
6559 static bool
6560 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6562 bool changed = false;
6563 struct iv_ca *iv_ca;
6564 edge exit = single_dom_exit (loop);
6565 basic_block *body;
6567 gcc_assert (!data->niters);
6568 data->current_loop = loop;
6569 data->speed = optimize_loop_for_speed_p (loop);
6571 if (dump_file && (dump_flags & TDF_DETAILS))
6573 fprintf (dump_file, "Processing loop %d\n", loop->num);
6575 if (exit)
6577 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6578 exit->src->index, exit->dest->index);
6579 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6580 fprintf (dump_file, "\n");
6583 fprintf (dump_file, "\n");
6586 body = get_loop_body (loop);
6587 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6588 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6589 free (body);
6591 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6593 /* For each ssa name determines whether it behaves as an induction variable
6594 in some loop. */
6595 if (!find_induction_variables (data))
6596 goto finish;
6598 /* Finds interesting uses (item 1). */
6599 find_interesting_uses (data);
6600 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6601 goto finish;
6603 /* Finds candidates for the induction variables (item 2). */
6604 find_iv_candidates (data);
6606 /* Calculates the costs (item 3, part 1). */
6607 determine_iv_costs (data);
6608 determine_use_iv_costs (data);
6609 determine_set_costs (data);
6611 /* Find the optimal set of induction variables (item 3, part 2). */
6612 iv_ca = find_optimal_iv_set (data);
6613 if (!iv_ca)
6614 goto finish;
6615 changed = true;
6617 /* Create the new induction variables (item 4, part 1). */
6618 create_new_ivs (data, iv_ca);
6619 iv_ca_free (&iv_ca);
6621 /* Rewrite the uses (item 4, part 2). */
6622 rewrite_uses (data);
6624 /* Remove the ivs that are unused after rewriting. */
6625 remove_unused_ivs (data);
6627 /* We have changed the structure of induction variables; it might happen
6628 that definitions in the scev database refer to some of them that were
6629 eliminated. */
6630 scev_reset ();
6632 finish:
6633 free_loop_data (data);
6635 return changed;
6638 /* Main entry point. Optimizes induction variables in loops. */
6640 void
6641 tree_ssa_iv_optimize (void)
6643 struct loop *loop;
6644 struct ivopts_data data;
6645 loop_iterator li;
6647 tree_ssa_iv_optimize_init (&data);
6649 /* Optimize the loops starting with the innermost ones. */
6650 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6652 if (dump_file && (dump_flags & TDF_DETAILS))
6653 flow_loop_dump (loop, dump_file, NULL, 1);
6655 tree_ssa_iv_optimize_loop (&data, loop);
6658 tree_ssa_iv_optimize_finalize (&data);