2011-08-15 Richard Guenther <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob79fff3f4eaea935598d4eeee1baabda7ff08b201
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
97 #include "expmed.h"
98 #undef add_cost
99 #undef zero_cost
101 /* FIXME: Expressions are expanded to RTL in this pass to determine the
102 cost of different addressing modes. This should be moved to a TBD
103 interface between the GIMPLE and RTL worlds. */
104 #include "expr.h"
106 /* The infinite cost. */
107 #define INFTY 10000000
109 #define AVG_LOOP_NITER(LOOP) 5
111 /* Returns the expected number of loop iterations for LOOP.
112 The average trip count is computed from profile data if it
113 exists. */
115 static inline HOST_WIDE_INT
116 avg_loop_niter (struct loop *loop)
118 HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
119 if (niter == -1)
120 return AVG_LOOP_NITER (loop);
122 return niter;
125 /* Representation of the induction variable. */
126 struct iv
128 tree base; /* Initial value of the iv. */
129 tree base_object; /* A memory object to that the induction variable points. */
130 tree step; /* Step of the iv (constant only). */
131 tree ssa_name; /* The ssa name with the value. */
132 bool biv_p; /* Is it a biv? */
133 bool have_use_for; /* Do we already have a use for it? */
134 unsigned use_id; /* The identifier in the use if it is the case. */
137 /* Per-ssa version information (induction variable descriptions, etc.). */
138 struct version_info
140 tree name; /* The ssa name. */
141 struct iv *iv; /* Induction variable description. */
142 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
143 an expression that is not an induction variable. */
144 bool preserve_biv; /* For the original biv, whether to preserve it. */
145 unsigned inv_id; /* Id of an invariant. */
148 /* Types of uses. */
149 enum use_type
151 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
152 USE_ADDRESS, /* Use in an address. */
153 USE_COMPARE /* Use is a compare. */
156 /* Cost of a computation. */
157 typedef struct
159 int cost; /* The runtime cost. */
160 unsigned complexity; /* The estimate of the complexity of the code for
161 the computation (in no concrete units --
162 complexity field should be larger for more
163 complex expressions and addressing modes). */
164 } comp_cost;
166 static const comp_cost zero_cost = {0, 0};
167 static const comp_cost infinite_cost = {INFTY, INFTY};
169 /* The candidate - cost pair. */
170 struct cost_pair
172 struct iv_cand *cand; /* The candidate. */
173 comp_cost cost; /* The cost. */
174 bitmap depends_on; /* The list of invariants that have to be
175 preserved. */
176 tree value; /* For final value elimination, the expression for
177 the final value of the iv. For iv elimination,
178 the new bound to compare with. */
179 int inv_expr_id; /* Loop invariant expression id. */
182 /* Use. */
183 struct iv_use
185 unsigned id; /* The id of the use. */
186 enum use_type type; /* Type of the use. */
187 struct iv *iv; /* The induction variable it is based on. */
188 gimple stmt; /* Statement in that it occurs. */
189 tree *op_p; /* The place where it occurs. */
190 bitmap related_cands; /* The set of "related" iv candidates, plus the common
191 important ones. */
193 unsigned n_map_members; /* Number of candidates in the cost_map list. */
194 struct cost_pair *cost_map;
195 /* The costs wrto the iv candidates. */
197 struct iv_cand *selected;
198 /* The selected candidate. */
201 /* The position where the iv is computed. */
202 enum iv_position
204 IP_NORMAL, /* At the end, just before the exit condition. */
205 IP_END, /* At the end of the latch block. */
206 IP_BEFORE_USE, /* Immediately before a specific use. */
207 IP_AFTER_USE, /* Immediately after a specific use. */
208 IP_ORIGINAL /* The original biv. */
211 /* The induction variable candidate. */
212 struct iv_cand
214 unsigned id; /* The number of the candidate. */
215 bool important; /* Whether this is an "important" candidate, i.e. such
216 that it should be considered by all uses. */
217 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
218 gimple incremented_at;/* For original biv, the statement where it is
219 incremented. */
220 tree var_before; /* The variable used for it before increment. */
221 tree var_after; /* The variable used for it after increment. */
222 struct iv *iv; /* The value of the candidate. NULL for
223 "pseudocandidate" used to indicate the possibility
224 to replace the final value of an iv by direct
225 computation of the value. */
226 unsigned cost; /* Cost of the candidate. */
227 unsigned cost_step; /* Cost of the candidate's increment operation. */
228 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
229 where it is incremented. */
230 bitmap depends_on; /* The list of invariants that are used in step of the
231 biv. */
234 /* Loop invariant expression hashtable entry. */
235 struct iv_inv_expr_ent
237 tree expr;
238 int id;
239 hashval_t hash;
242 /* The data used by the induction variable optimizations. */
244 typedef struct iv_use *iv_use_p;
245 DEF_VEC_P(iv_use_p);
246 DEF_VEC_ALLOC_P(iv_use_p,heap);
248 typedef struct iv_cand *iv_cand_p;
249 DEF_VEC_P(iv_cand_p);
250 DEF_VEC_ALLOC_P(iv_cand_p,heap);
252 struct ivopts_data
254 /* The currently optimized loop. */
255 struct loop *current_loop;
257 /* Numbers of iterations for all exits of the current loop. */
258 struct pointer_map_t *niters;
260 /* Number of registers used in it. */
261 unsigned regs_used;
263 /* The size of version_info array allocated. */
264 unsigned version_info_size;
266 /* The array of information for the ssa names. */
267 struct version_info *version_info;
269 /* The hashtable of loop invariant expressions created
270 by ivopt. */
271 htab_t inv_expr_tab;
273 /* Loop invariant expression id. */
274 int inv_expr_id;
276 /* The bitmap of indices in version_info whose value was changed. */
277 bitmap relevant;
279 /* The uses of induction variables. */
280 VEC(iv_use_p,heap) *iv_uses;
282 /* The candidates. */
283 VEC(iv_cand_p,heap) *iv_candidates;
285 /* A bitmap of important candidates. */
286 bitmap important_candidates;
288 /* The maximum invariant id. */
289 unsigned max_inv_id;
291 /* Whether to consider just related and important candidates when replacing a
292 use. */
293 bool consider_all_candidates;
295 /* Are we optimizing for speed? */
296 bool speed;
298 /* Whether the loop body includes any function calls. */
299 bool body_includes_call;
302 /* An assignment of iv candidates to uses. */
304 struct iv_ca
306 /* The number of uses covered by the assignment. */
307 unsigned upto;
309 /* Number of uses that cannot be expressed by the candidates in the set. */
310 unsigned bad_uses;
312 /* Candidate assigned to a use, together with the related costs. */
313 struct cost_pair **cand_for_use;
315 /* Number of times each candidate is used. */
316 unsigned *n_cand_uses;
318 /* The candidates used. */
319 bitmap cands;
321 /* The number of candidates in the set. */
322 unsigned n_cands;
324 /* Total number of registers needed. */
325 unsigned n_regs;
327 /* Total cost of expressing uses. */
328 comp_cost cand_use_cost;
330 /* Total cost of candidates. */
331 unsigned cand_cost;
333 /* Number of times each invariant is used. */
334 unsigned *n_invariant_uses;
336 /* The array holding the number of uses of each loop
337 invariant expressions created by ivopt. */
338 unsigned *used_inv_expr;
340 /* The number of created loop invariants. */
341 unsigned num_used_inv_expr;
343 /* Total cost of the assignment. */
344 comp_cost cost;
347 /* Difference of two iv candidate assignments. */
349 struct iv_ca_delta
351 /* Changed use. */
352 struct iv_use *use;
354 /* An old assignment (for rollback purposes). */
355 struct cost_pair *old_cp;
357 /* A new assignment. */
358 struct cost_pair *new_cp;
360 /* Next change in the list. */
361 struct iv_ca_delta *next_change;
364 /* Bound on number of candidates below that all candidates are considered. */
366 #define CONSIDER_ALL_CANDIDATES_BOUND \
367 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
369 /* If there are more iv occurrences, we just give up (it is quite unlikely that
370 optimizing such a loop would help, and it would take ages). */
372 #define MAX_CONSIDERED_USES \
373 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
375 /* If there are at most this number of ivs in the set, try removing unnecessary
376 ivs from the set always. */
378 #define ALWAYS_PRUNE_CAND_SET_BOUND \
379 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
381 /* The list of trees for that the decl_rtl field must be reset is stored
382 here. */
384 static VEC(tree,heap) *decl_rtl_to_reset;
386 static comp_cost force_expr_to_var_cost (tree, bool);
388 /* Number of uses recorded in DATA. */
390 static inline unsigned
391 n_iv_uses (struct ivopts_data *data)
393 return VEC_length (iv_use_p, data->iv_uses);
396 /* Ith use recorded in DATA. */
398 static inline struct iv_use *
399 iv_use (struct ivopts_data *data, unsigned i)
401 return VEC_index (iv_use_p, data->iv_uses, i);
404 /* Number of candidates recorded in DATA. */
406 static inline unsigned
407 n_iv_cands (struct ivopts_data *data)
409 return VEC_length (iv_cand_p, data->iv_candidates);
412 /* Ith candidate recorded in DATA. */
414 static inline struct iv_cand *
415 iv_cand (struct ivopts_data *data, unsigned i)
417 return VEC_index (iv_cand_p, data->iv_candidates, i);
420 /* The single loop exit if it dominates the latch, NULL otherwise. */
422 edge
423 single_dom_exit (struct loop *loop)
425 edge exit = single_exit (loop);
427 if (!exit)
428 return NULL;
430 if (!just_once_each_iteration_p (loop, exit->src))
431 return NULL;
433 return exit;
436 /* Dumps information about the induction variable IV to FILE. */
438 extern void dump_iv (FILE *, struct iv *);
439 void
440 dump_iv (FILE *file, struct iv *iv)
442 if (iv->ssa_name)
444 fprintf (file, "ssa name ");
445 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
446 fprintf (file, "\n");
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
453 if (iv->step)
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
463 else
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
470 if (iv->base_object)
472 fprintf (file, " base object ");
473 print_generic_expr (file, iv->base_object, TDF_SLIM);
474 fprintf (file, "\n");
477 if (iv->biv_p)
478 fprintf (file, " is a biv\n");
481 /* Dumps information about the USE to FILE. */
483 extern void dump_use (FILE *, struct iv_use *);
484 void
485 dump_use (FILE *file, struct iv_use *use)
487 fprintf (file, "use %d\n", use->id);
489 switch (use->type)
491 case USE_NONLINEAR_EXPR:
492 fprintf (file, " generic\n");
493 break;
495 case USE_ADDRESS:
496 fprintf (file, " address\n");
497 break;
499 case USE_COMPARE:
500 fprintf (file, " compare\n");
501 break;
503 default:
504 gcc_unreachable ();
507 fprintf (file, " in statement ");
508 print_gimple_stmt (file, use->stmt, 0, 0);
509 fprintf (file, "\n");
511 fprintf (file, " at position ");
512 if (use->op_p)
513 print_generic_expr (file, *use->op_p, TDF_SLIM);
514 fprintf (file, "\n");
516 dump_iv (file, use->iv);
518 if (use->related_cands)
520 fprintf (file, " related candidates ");
521 dump_bitmap (file, use->related_cands);
525 /* Dumps information about the uses to FILE. */
527 extern void dump_uses (FILE *, struct ivopts_data *);
528 void
529 dump_uses (FILE *file, struct ivopts_data *data)
531 unsigned i;
532 struct iv_use *use;
534 for (i = 0; i < n_iv_uses (data); i++)
536 use = iv_use (data, i);
538 dump_use (file, use);
539 fprintf (file, "\n");
543 /* Dumps information about induction variable candidate CAND to FILE. */
545 extern void dump_cand (FILE *, struct iv_cand *);
546 void
547 dump_cand (FILE *file, struct iv_cand *cand)
549 struct iv *iv = cand->iv;
551 fprintf (file, "candidate %d%s\n",
552 cand->id, cand->important ? " (important)" : "");
554 if (cand->depends_on)
556 fprintf (file, " depends on ");
557 dump_bitmap (file, cand->depends_on);
560 if (!iv)
562 fprintf (file, " final value replacement\n");
563 return;
566 if (cand->var_before)
568 fprintf (file, " var_before ");
569 print_generic_expr (file, cand->var_before, TDF_SLIM);
570 fprintf (file, "\n");
572 if (cand->var_after)
574 fprintf (file, " var_after ");
575 print_generic_expr (file, cand->var_after, TDF_SLIM);
576 fprintf (file, "\n");
579 switch (cand->pos)
581 case IP_NORMAL:
582 fprintf (file, " incremented before exit test\n");
583 break;
585 case IP_BEFORE_USE:
586 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
587 break;
589 case IP_AFTER_USE:
590 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
591 break;
593 case IP_END:
594 fprintf (file, " incremented at end\n");
595 break;
597 case IP_ORIGINAL:
598 fprintf (file, " original biv\n");
599 break;
602 dump_iv (file, iv);
605 /* Returns the info for ssa version VER. */
607 static inline struct version_info *
608 ver_info (struct ivopts_data *data, unsigned ver)
610 return data->version_info + ver;
613 /* Returns the info for ssa name NAME. */
615 static inline struct version_info *
616 name_info (struct ivopts_data *data, tree name)
618 return ver_info (data, SSA_NAME_VERSION (name));
621 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
622 emitted in LOOP. */
624 static bool
625 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
627 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
629 gcc_assert (bb);
631 if (sbb == loop->latch)
632 return true;
634 if (sbb != bb)
635 return false;
637 return stmt == last_stmt (bb);
640 /* Returns true if STMT if after the place where the original induction
641 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
642 if the positions are identical. */
644 static bool
645 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
647 basic_block cand_bb = gimple_bb (cand->incremented_at);
648 basic_block stmt_bb = gimple_bb (stmt);
650 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
651 return false;
653 if (stmt_bb != cand_bb)
654 return true;
656 if (true_if_equal
657 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
658 return true;
659 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
662 /* Returns true if STMT if after the place where the induction variable
663 CAND is incremented in LOOP. */
665 static bool
666 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
668 switch (cand->pos)
670 case IP_END:
671 return false;
673 case IP_NORMAL:
674 return stmt_after_ip_normal_pos (loop, stmt);
676 case IP_ORIGINAL:
677 case IP_AFTER_USE:
678 return stmt_after_inc_pos (cand, stmt, false);
680 case IP_BEFORE_USE:
681 return stmt_after_inc_pos (cand, stmt, true);
683 default:
684 gcc_unreachable ();
688 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
690 static bool
691 abnormal_ssa_name_p (tree exp)
693 if (!exp)
694 return false;
696 if (TREE_CODE (exp) != SSA_NAME)
697 return false;
699 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
702 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
703 abnormal phi node. Callback for for_each_index. */
705 static bool
706 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
707 void *data ATTRIBUTE_UNUSED)
709 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
711 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
712 return false;
713 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
714 return false;
717 return !abnormal_ssa_name_p (*index);
720 /* Returns true if EXPR contains a ssa name that occurs in an
721 abnormal phi node. */
723 bool
724 contains_abnormal_ssa_name_p (tree expr)
726 enum tree_code code;
727 enum tree_code_class codeclass;
729 if (!expr)
730 return false;
732 code = TREE_CODE (expr);
733 codeclass = TREE_CODE_CLASS (code);
735 if (code == SSA_NAME)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
738 if (code == INTEGER_CST
739 || is_gimple_min_invariant (expr))
740 return false;
742 if (code == ADDR_EXPR)
743 return !for_each_index (&TREE_OPERAND (expr, 0),
744 idx_contains_abnormal_ssa_name_p,
745 NULL);
747 if (code == COND_EXPR)
748 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
749 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
750 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
752 switch (codeclass)
754 case tcc_binary:
755 case tcc_comparison:
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
757 return true;
759 /* Fallthru. */
760 case tcc_unary:
761 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
762 return true;
764 break;
766 default:
767 gcc_unreachable ();
770 return false;
773 /* Returns tree describing number of iterations determined from
774 EXIT of DATA->current_loop, or NULL if something goes wrong. */
776 static tree
777 niter_for_exit (struct ivopts_data *data, edge exit,
778 struct tree_niter_desc **desc_p)
780 struct tree_niter_desc* desc = NULL;
781 tree niter;
782 void **slot;
784 if (!data->niters)
786 data->niters = pointer_map_create ();
787 slot = NULL;
789 else
790 slot = pointer_map_contains (data->niters, exit);
792 if (!slot)
794 /* Try to determine number of iterations. We must know it
795 unconditionally (i.e., without possibility of # of iterations
796 being zero). Also, we cannot safely work with ssa names that
797 appear in phi nodes on abnormal edges, so that we do not create
798 overlapping life ranges for them (PR 27283). */
799 desc = XNEW (struct tree_niter_desc);
800 if (number_of_iterations_exit (data->current_loop,
801 exit, desc, true)
802 && integer_zerop (desc->may_be_zero)
803 && !contains_abnormal_ssa_name_p (desc->niter))
804 niter = desc->niter;
805 else
806 niter = NULL_TREE;
808 desc->niter = niter;
809 slot = pointer_map_insert (data->niters, exit);
810 *slot = desc;
812 else
813 niter = ((struct tree_niter_desc *) *slot)->niter;
815 if (desc_p)
816 *desc_p = (struct tree_niter_desc *) *slot;
817 return niter;
820 /* Returns tree describing number of iterations determined from
821 single dominating exit of DATA->current_loop, or NULL if something
822 goes wrong. */
824 static tree
825 niter_for_single_dom_exit (struct ivopts_data *data)
827 edge exit = single_dom_exit (data->current_loop);
829 if (!exit)
830 return NULL;
832 return niter_for_exit (data, exit, NULL);
835 /* Hash table equality function for expressions. */
837 static int
838 htab_inv_expr_eq (const void *ent1, const void *ent2)
840 const struct iv_inv_expr_ent *expr1 =
841 (const struct iv_inv_expr_ent *)ent1;
842 const struct iv_inv_expr_ent *expr2 =
843 (const struct iv_inv_expr_ent *)ent2;
845 return expr1->hash == expr2->hash
846 && operand_equal_p (expr1->expr, expr2->expr, 0);
849 /* Hash function for loop invariant expressions. */
851 static hashval_t
852 htab_inv_expr_hash (const void *ent)
854 const struct iv_inv_expr_ent *expr =
855 (const struct iv_inv_expr_ent *)ent;
856 return expr->hash;
859 /* Initializes data structures used by the iv optimization pass, stored
860 in DATA. */
862 static void
863 tree_ssa_iv_optimize_init (struct ivopts_data *data)
865 data->version_info_size = 2 * num_ssa_names;
866 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
867 data->relevant = BITMAP_ALLOC (NULL);
868 data->important_candidates = BITMAP_ALLOC (NULL);
869 data->max_inv_id = 0;
870 data->niters = NULL;
871 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
872 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
873 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
874 htab_inv_expr_eq, free);
875 data->inv_expr_id = 0;
876 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
879 /* Returns a memory object to that EXPR points. In case we are able to
880 determine that it does not point to any such object, NULL is returned. */
882 static tree
883 determine_base_object (tree expr)
885 enum tree_code code = TREE_CODE (expr);
886 tree base, obj;
888 /* If this is a pointer casted to any type, we need to determine
889 the base object for the pointer; so handle conversions before
890 throwing away non-pointer expressions. */
891 if (CONVERT_EXPR_P (expr))
892 return determine_base_object (TREE_OPERAND (expr, 0));
894 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
895 return NULL_TREE;
897 switch (code)
899 case INTEGER_CST:
900 return NULL_TREE;
902 case ADDR_EXPR:
903 obj = TREE_OPERAND (expr, 0);
904 base = get_base_address (obj);
906 if (!base)
907 return expr;
909 if (TREE_CODE (base) == MEM_REF)
910 return determine_base_object (TREE_OPERAND (base, 0));
912 return fold_convert (ptr_type_node,
913 build_fold_addr_expr (base));
915 case POINTER_PLUS_EXPR:
916 return determine_base_object (TREE_OPERAND (expr, 0));
918 case PLUS_EXPR:
919 case MINUS_EXPR:
920 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
921 gcc_unreachable ();
923 default:
924 return fold_convert (ptr_type_node, expr);
928 /* Allocates an induction variable with given initial value BASE and step STEP
929 for loop LOOP. */
931 static struct iv *
932 alloc_iv (tree base, tree step)
934 struct iv *iv = XCNEW (struct iv);
935 gcc_assert (step != NULL_TREE);
937 iv->base = base;
938 iv->base_object = determine_base_object (base);
939 iv->step = step;
940 iv->biv_p = false;
941 iv->have_use_for = false;
942 iv->use_id = 0;
943 iv->ssa_name = NULL_TREE;
945 return iv;
948 /* Sets STEP and BASE for induction variable IV. */
950 static void
951 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
953 struct version_info *info = name_info (data, iv);
955 gcc_assert (!info->iv);
957 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
958 info->iv = alloc_iv (base, step);
959 info->iv->ssa_name = iv;
962 /* Finds induction variable declaration for VAR. */
964 static struct iv *
965 get_iv (struct ivopts_data *data, tree var)
967 basic_block bb;
968 tree type = TREE_TYPE (var);
970 if (!POINTER_TYPE_P (type)
971 && !INTEGRAL_TYPE_P (type))
972 return NULL;
974 if (!name_info (data, var)->iv)
976 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
978 if (!bb
979 || !flow_bb_inside_loop_p (data->current_loop, bb))
980 set_iv (data, var, var, build_int_cst (type, 0));
983 return name_info (data, var)->iv;
986 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
987 not define a simple affine biv with nonzero step. */
989 static tree
990 determine_biv_step (gimple phi)
992 struct loop *loop = gimple_bb (phi)->loop_father;
993 tree name = PHI_RESULT (phi);
994 affine_iv iv;
996 if (!is_gimple_reg (name))
997 return NULL_TREE;
999 if (!simple_iv (loop, loop, name, &iv, true))
1000 return NULL_TREE;
1002 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1005 /* Finds basic ivs. */
1007 static bool
1008 find_bivs (struct ivopts_data *data)
1010 gimple phi;
1011 tree step, type, base;
1012 bool found = false;
1013 struct loop *loop = data->current_loop;
1014 gimple_stmt_iterator psi;
1016 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1018 phi = gsi_stmt (psi);
1020 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1021 continue;
1023 step = determine_biv_step (phi);
1024 if (!step)
1025 continue;
1027 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1028 base = expand_simple_operations (base);
1029 if (contains_abnormal_ssa_name_p (base)
1030 || contains_abnormal_ssa_name_p (step))
1031 continue;
1033 type = TREE_TYPE (PHI_RESULT (phi));
1034 base = fold_convert (type, base);
1035 if (step)
1037 if (POINTER_TYPE_P (type))
1038 step = fold_convert (sizetype, step);
1039 else
1040 step = fold_convert (type, step);
1043 set_iv (data, PHI_RESULT (phi), base, step);
1044 found = true;
1047 return found;
1050 /* Marks basic ivs. */
1052 static void
1053 mark_bivs (struct ivopts_data *data)
1055 gimple phi;
1056 tree var;
1057 struct iv *iv, *incr_iv;
1058 struct loop *loop = data->current_loop;
1059 basic_block incr_bb;
1060 gimple_stmt_iterator psi;
1062 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1064 phi = gsi_stmt (psi);
1066 iv = get_iv (data, PHI_RESULT (phi));
1067 if (!iv)
1068 continue;
1070 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1071 incr_iv = get_iv (data, var);
1072 if (!incr_iv)
1073 continue;
1075 /* If the increment is in the subloop, ignore it. */
1076 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1077 if (incr_bb->loop_father != data->current_loop
1078 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1079 continue;
1081 iv->biv_p = true;
1082 incr_iv->biv_p = true;
1086 /* Checks whether STMT defines a linear induction variable and stores its
1087 parameters to IV. */
1089 static bool
1090 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1092 tree lhs;
1093 struct loop *loop = data->current_loop;
1095 iv->base = NULL_TREE;
1096 iv->step = NULL_TREE;
1098 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1099 return false;
1101 lhs = gimple_assign_lhs (stmt);
1102 if (TREE_CODE (lhs) != SSA_NAME)
1103 return false;
1105 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1106 return false;
1107 iv->base = expand_simple_operations (iv->base);
1109 if (contains_abnormal_ssa_name_p (iv->base)
1110 || contains_abnormal_ssa_name_p (iv->step))
1111 return false;
1113 /* If STMT could throw, then do not consider STMT as defining a GIV.
1114 While this will suppress optimizations, we can not safely delete this
1115 GIV and associated statements, even if it appears it is not used. */
1116 if (stmt_could_throw_p (stmt))
1117 return false;
1119 return true;
1122 /* Finds general ivs in statement STMT. */
1124 static void
1125 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1127 affine_iv iv;
1129 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1130 return;
1132 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1135 /* Finds general ivs in basic block BB. */
1137 static void
1138 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1140 gimple_stmt_iterator bsi;
1142 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1143 find_givs_in_stmt (data, gsi_stmt (bsi));
1146 /* Finds general ivs. */
1148 static void
1149 find_givs (struct ivopts_data *data)
1151 struct loop *loop = data->current_loop;
1152 basic_block *body = get_loop_body_in_dom_order (loop);
1153 unsigned i;
1155 for (i = 0; i < loop->num_nodes; i++)
1156 find_givs_in_bb (data, body[i]);
1157 free (body);
1160 /* For each ssa name defined in LOOP determines whether it is an induction
1161 variable and if so, its initial value and step. */
1163 static bool
1164 find_induction_variables (struct ivopts_data *data)
1166 unsigned i;
1167 bitmap_iterator bi;
1169 if (!find_bivs (data))
1170 return false;
1172 find_givs (data);
1173 mark_bivs (data);
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1177 tree niter = niter_for_single_dom_exit (data);
1179 if (niter)
1181 fprintf (dump_file, " number of iterations ");
1182 print_generic_expr (dump_file, niter, TDF_SLIM);
1183 fprintf (dump_file, "\n\n");
1186 fprintf (dump_file, "Induction variables:\n\n");
1188 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1190 if (ver_info (data, i)->iv)
1191 dump_iv (dump_file, ver_info (data, i)->iv);
1195 return true;
1198 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1200 static struct iv_use *
1201 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1202 gimple stmt, enum use_type use_type)
1204 struct iv_use *use = XCNEW (struct iv_use);
1206 use->id = n_iv_uses (data);
1207 use->type = use_type;
1208 use->iv = iv;
1209 use->stmt = stmt;
1210 use->op_p = use_p;
1211 use->related_cands = BITMAP_ALLOC (NULL);
1213 /* To avoid showing ssa name in the dumps, if it was not reset by the
1214 caller. */
1215 iv->ssa_name = NULL_TREE;
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 dump_use (dump_file, use);
1220 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1222 return use;
1225 /* Checks whether OP is a loop-level invariant and if so, records it.
1226 NONLINEAR_USE is true if the invariant is used in a way we do not
1227 handle specially. */
1229 static void
1230 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1232 basic_block bb;
1233 struct version_info *info;
1235 if (TREE_CODE (op) != SSA_NAME
1236 || !is_gimple_reg (op))
1237 return;
1239 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1240 if (bb
1241 && flow_bb_inside_loop_p (data->current_loop, bb))
1242 return;
1244 info = name_info (data, op);
1245 info->name = op;
1246 info->has_nonlin_use |= nonlinear_use;
1247 if (!info->inv_id)
1248 info->inv_id = ++data->max_inv_id;
1249 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1252 /* Checks whether the use OP is interesting and if so, records it. */
1254 static struct iv_use *
1255 find_interesting_uses_op (struct ivopts_data *data, tree op)
1257 struct iv *iv;
1258 struct iv *civ;
1259 gimple stmt;
1260 struct iv_use *use;
1262 if (TREE_CODE (op) != SSA_NAME)
1263 return NULL;
1265 iv = get_iv (data, op);
1266 if (!iv)
1267 return NULL;
1269 if (iv->have_use_for)
1271 use = iv_use (data, iv->use_id);
1273 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1274 return use;
1277 if (integer_zerop (iv->step))
1279 record_invariant (data, op, true);
1280 return NULL;
1282 iv->have_use_for = true;
1284 civ = XNEW (struct iv);
1285 *civ = *iv;
1287 stmt = SSA_NAME_DEF_STMT (op);
1288 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1289 || is_gimple_assign (stmt));
1291 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1292 iv->use_id = use->id;
1294 return use;
1297 /* Given a condition in statement STMT, checks whether it is a compare
1298 of an induction variable and an invariant. If this is the case,
1299 CONTROL_VAR is set to location of the iv, BOUND to the location of
1300 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1301 induction variable descriptions, and true is returned. If this is not
1302 the case, CONTROL_VAR and BOUND are set to the arguments of the
1303 condition and false is returned. */
1305 static bool
1306 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1307 tree **control_var, tree **bound,
1308 struct iv **iv_var, struct iv **iv_bound)
1310 /* The objects returned when COND has constant operands. */
1311 static struct iv const_iv;
1312 static tree zero;
1313 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1314 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1315 bool ret = false;
1317 if (gimple_code (stmt) == GIMPLE_COND)
1319 op0 = gimple_cond_lhs_ptr (stmt);
1320 op1 = gimple_cond_rhs_ptr (stmt);
1322 else
1324 op0 = gimple_assign_rhs1_ptr (stmt);
1325 op1 = gimple_assign_rhs2_ptr (stmt);
1328 zero = integer_zero_node;
1329 const_iv.step = integer_zero_node;
1331 if (TREE_CODE (*op0) == SSA_NAME)
1332 iv0 = get_iv (data, *op0);
1333 if (TREE_CODE (*op1) == SSA_NAME)
1334 iv1 = get_iv (data, *op1);
1336 /* Exactly one of the compared values must be an iv, and the other one must
1337 be an invariant. */
1338 if (!iv0 || !iv1)
1339 goto end;
1341 if (integer_zerop (iv0->step))
1343 /* Control variable may be on the other side. */
1344 tmp_op = op0; op0 = op1; op1 = tmp_op;
1345 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1347 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1349 end:
1350 if (control_var)
1351 *control_var = op0;;
1352 if (iv_var)
1353 *iv_var = iv0;;
1354 if (bound)
1355 *bound = op1;
1356 if (iv_bound)
1357 *iv_bound = iv1;
1359 return ret;
1362 /* Checks whether the condition in STMT is interesting and if so,
1363 records it. */
1365 static void
1366 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1368 tree *var_p, *bound_p;
1369 struct iv *var_iv, *civ;
1371 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1373 find_interesting_uses_op (data, *var_p);
1374 find_interesting_uses_op (data, *bound_p);
1375 return;
1378 civ = XNEW (struct iv);
1379 *civ = *var_iv;
1380 record_use (data, NULL, civ, stmt, USE_COMPARE);
1383 /* Returns true if expression EXPR is obviously invariant in LOOP,
1384 i.e. if all its operands are defined outside of the LOOP. LOOP
1385 should not be the function body. */
1387 bool
1388 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1390 basic_block def_bb;
1391 unsigned i, len;
1393 gcc_assert (loop_depth (loop) > 0);
1395 if (is_gimple_min_invariant (expr))
1396 return true;
1398 if (TREE_CODE (expr) == SSA_NAME)
1400 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1401 if (def_bb
1402 && flow_bb_inside_loop_p (loop, def_bb))
1403 return false;
1405 return true;
1408 if (!EXPR_P (expr))
1409 return false;
1411 len = TREE_OPERAND_LENGTH (expr);
1412 for (i = 0; i < len; i++)
1413 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1414 return false;
1416 return true;
1419 /* Returns true if statement STMT is obviously invariant in LOOP,
1420 i.e. if all its operands on the RHS are defined outside of the LOOP.
1421 LOOP should not be the function body. */
1423 bool
1424 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1426 unsigned i;
1427 tree lhs;
1429 gcc_assert (loop_depth (loop) > 0);
1431 lhs = gimple_get_lhs (stmt);
1432 for (i = 0; i < gimple_num_ops (stmt); i++)
1434 tree op = gimple_op (stmt, i);
1435 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1436 return false;
1439 return true;
1442 /* Cumulates the steps of indices into DATA and replaces their values with the
1443 initial ones. Returns false when the value of the index cannot be determined.
1444 Callback for for_each_index. */
1446 struct ifs_ivopts_data
1448 struct ivopts_data *ivopts_data;
1449 gimple stmt;
1450 tree step;
1453 static bool
1454 idx_find_step (tree base, tree *idx, void *data)
1456 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1457 struct iv *iv;
1458 tree step, iv_base, iv_step, lbound, off;
1459 struct loop *loop = dta->ivopts_data->current_loop;
1461 /* If base is a component ref, require that the offset of the reference
1462 be invariant. */
1463 if (TREE_CODE (base) == COMPONENT_REF)
1465 off = component_ref_field_offset (base);
1466 return expr_invariant_in_loop_p (loop, off);
1469 /* If base is array, first check whether we will be able to move the
1470 reference out of the loop (in order to take its address in strength
1471 reduction). In order for this to work we need both lower bound
1472 and step to be loop invariants. */
1473 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1475 /* Moreover, for a range, the size needs to be invariant as well. */
1476 if (TREE_CODE (base) == ARRAY_RANGE_REF
1477 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1478 return false;
1480 step = array_ref_element_size (base);
1481 lbound = array_ref_low_bound (base);
1483 if (!expr_invariant_in_loop_p (loop, step)
1484 || !expr_invariant_in_loop_p (loop, lbound))
1485 return false;
1488 if (TREE_CODE (*idx) != SSA_NAME)
1489 return true;
1491 iv = get_iv (dta->ivopts_data, *idx);
1492 if (!iv)
1493 return false;
1495 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1496 *&x[0], which is not folded and does not trigger the
1497 ARRAY_REF path below. */
1498 *idx = iv->base;
1500 if (integer_zerop (iv->step))
1501 return true;
1503 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1505 step = array_ref_element_size (base);
1507 /* We only handle addresses whose step is an integer constant. */
1508 if (TREE_CODE (step) != INTEGER_CST)
1509 return false;
1511 else
1512 /* The step for pointer arithmetics already is 1 byte. */
1513 step = size_one_node;
1515 iv_base = iv->base;
1516 iv_step = iv->step;
1517 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1518 sizetype, &iv_base, &iv_step, dta->stmt,
1519 false))
1521 /* The index might wrap. */
1522 return false;
1525 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1526 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1528 return true;
1531 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1532 object is passed to it in DATA. */
1534 static bool
1535 idx_record_use (tree base, tree *idx,
1536 void *vdata)
1538 struct ivopts_data *data = (struct ivopts_data *) vdata;
1539 find_interesting_uses_op (data, *idx);
1540 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542 find_interesting_uses_op (data, array_ref_element_size (base));
1543 find_interesting_uses_op (data, array_ref_low_bound (base));
1545 return true;
1548 /* If we can prove that TOP = cst * BOT for some constant cst,
1549 store cst to MUL and return true. Otherwise return false.
1550 The returned value is always sign-extended, regardless of the
1551 signedness of TOP and BOT. */
1553 static bool
1554 constant_multiple_of (tree top, tree bot, double_int *mul)
1556 tree mby;
1557 enum tree_code code;
1558 double_int res, p0, p1;
1559 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1561 STRIP_NOPS (top);
1562 STRIP_NOPS (bot);
1564 if (operand_equal_p (top, bot, 0))
1566 *mul = double_int_one;
1567 return true;
1570 code = TREE_CODE (top);
1571 switch (code)
1573 case MULT_EXPR:
1574 mby = TREE_OPERAND (top, 1);
1575 if (TREE_CODE (mby) != INTEGER_CST)
1576 return false;
1578 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1579 return false;
1581 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1582 precision);
1583 return true;
1585 case PLUS_EXPR:
1586 case MINUS_EXPR:
1587 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1588 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1589 return false;
1591 if (code == MINUS_EXPR)
1592 p1 = double_int_neg (p1);
1593 *mul = double_int_sext (double_int_add (p0, p1), precision);
1594 return true;
1596 case INTEGER_CST:
1597 if (TREE_CODE (bot) != INTEGER_CST)
1598 return false;
1600 p0 = double_int_sext (tree_to_double_int (top), precision);
1601 p1 = double_int_sext (tree_to_double_int (bot), precision);
1602 if (double_int_zero_p (p1))
1603 return false;
1604 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1605 precision);
1606 return double_int_zero_p (res);
1608 default:
1609 return false;
1613 /* Returns true if memory reference REF with step STEP may be unaligned. */
1615 static bool
1616 may_be_unaligned_p (tree ref, tree step)
1618 tree base;
1619 tree base_type;
1620 HOST_WIDE_INT bitsize;
1621 HOST_WIDE_INT bitpos;
1622 tree toffset;
1623 enum machine_mode mode;
1624 int unsignedp, volatilep;
1625 unsigned base_align;
1627 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1628 thus they are not misaligned. */
1629 if (TREE_CODE (ref) == TARGET_MEM_REF)
1630 return false;
1632 /* The test below is basically copy of what expr.c:normal_inner_ref
1633 does to check whether the object must be loaded by parts when
1634 STRICT_ALIGNMENT is true. */
1635 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1636 &unsignedp, &volatilep, true);
1637 base_type = TREE_TYPE (base);
1638 base_align = get_object_alignment (base);
1639 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1641 if (mode != BLKmode)
1643 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1645 if (base_align < mode_align
1646 || (bitpos % mode_align) != 0
1647 || (bitpos % BITS_PER_UNIT) != 0)
1648 return true;
1650 if (toffset
1651 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1652 return true;
1654 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1655 return true;
1658 return false;
1661 /* Return true if EXPR may be non-addressable. */
1663 bool
1664 may_be_nonaddressable_p (tree expr)
1666 switch (TREE_CODE (expr))
1668 case TARGET_MEM_REF:
1669 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1670 target, thus they are always addressable. */
1671 return false;
1673 case COMPONENT_REF:
1674 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1675 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677 case VIEW_CONVERT_EXPR:
1678 /* This kind of view-conversions may wrap non-addressable objects
1679 and make them look addressable. After some processing the
1680 non-addressability may be uncovered again, causing ADDR_EXPRs
1681 of inappropriate objects to be built. */
1682 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1683 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1684 return true;
1686 /* ... fall through ... */
1688 case ARRAY_REF:
1689 case ARRAY_RANGE_REF:
1690 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1692 CASE_CONVERT:
1693 return true;
1695 default:
1696 break;
1699 return false;
1702 /* Finds addresses in *OP_P inside STMT. */
1704 static void
1705 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1707 tree base = *op_p, step = size_zero_node;
1708 struct iv *civ;
1709 struct ifs_ivopts_data ifs_ivopts_data;
1711 /* Do not play with volatile memory references. A bit too conservative,
1712 perhaps, but safe. */
1713 if (gimple_has_volatile_ops (stmt))
1714 goto fail;
1716 /* Ignore bitfields for now. Not really something terribly complicated
1717 to handle. TODO. */
1718 if (TREE_CODE (base) == BIT_FIELD_REF)
1719 goto fail;
1721 base = unshare_expr (base);
1723 if (TREE_CODE (base) == TARGET_MEM_REF)
1725 tree type = build_pointer_type (TREE_TYPE (base));
1726 tree astep;
1728 if (TMR_BASE (base)
1729 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1731 civ = get_iv (data, TMR_BASE (base));
1732 if (!civ)
1733 goto fail;
1735 TMR_BASE (base) = civ->base;
1736 step = civ->step;
1738 if (TMR_INDEX2 (base)
1739 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1741 civ = get_iv (data, TMR_INDEX2 (base));
1742 if (!civ)
1743 goto fail;
1745 TMR_INDEX2 (base) = civ->base;
1746 step = civ->step;
1748 if (TMR_INDEX (base)
1749 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1751 civ = get_iv (data, TMR_INDEX (base));
1752 if (!civ)
1753 goto fail;
1755 TMR_INDEX (base) = civ->base;
1756 astep = civ->step;
1758 if (astep)
1760 if (TMR_STEP (base))
1761 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1763 step = fold_build2 (PLUS_EXPR, type, step, astep);
1767 if (integer_zerop (step))
1768 goto fail;
1769 base = tree_mem_ref_addr (type, base);
1771 else
1773 ifs_ivopts_data.ivopts_data = data;
1774 ifs_ivopts_data.stmt = stmt;
1775 ifs_ivopts_data.step = size_zero_node;
1776 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1777 || integer_zerop (ifs_ivopts_data.step))
1778 goto fail;
1779 step = ifs_ivopts_data.step;
1781 /* Check that the base expression is addressable. This needs
1782 to be done after substituting bases of IVs into it. */
1783 if (may_be_nonaddressable_p (base))
1784 goto fail;
1786 /* Moreover, on strict alignment platforms, check that it is
1787 sufficiently aligned. */
1788 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1789 goto fail;
1791 base = build_fold_addr_expr (base);
1793 /* Substituting bases of IVs into the base expression might
1794 have caused folding opportunities. */
1795 if (TREE_CODE (base) == ADDR_EXPR)
1797 tree *ref = &TREE_OPERAND (base, 0);
1798 while (handled_component_p (*ref))
1799 ref = &TREE_OPERAND (*ref, 0);
1800 if (TREE_CODE (*ref) == MEM_REF)
1802 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1803 TREE_OPERAND (*ref, 0),
1804 TREE_OPERAND (*ref, 1));
1805 if (tem)
1806 *ref = tem;
1811 civ = alloc_iv (base, step);
1812 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1813 return;
1815 fail:
1816 for_each_index (op_p, idx_record_use, data);
1819 /* Finds and records invariants used in STMT. */
1821 static void
1822 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1824 ssa_op_iter iter;
1825 use_operand_p use_p;
1826 tree op;
1828 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1830 op = USE_FROM_PTR (use_p);
1831 record_invariant (data, op, false);
1835 /* Finds interesting uses of induction variables in the statement STMT. */
1837 static void
1838 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1840 struct iv *iv;
1841 tree op, *lhs, *rhs;
1842 ssa_op_iter iter;
1843 use_operand_p use_p;
1844 enum tree_code code;
1846 find_invariants_stmt (data, stmt);
1848 if (gimple_code (stmt) == GIMPLE_COND)
1850 find_interesting_uses_cond (data, stmt);
1851 return;
1854 if (is_gimple_assign (stmt))
1856 lhs = gimple_assign_lhs_ptr (stmt);
1857 rhs = gimple_assign_rhs1_ptr (stmt);
1859 if (TREE_CODE (*lhs) == SSA_NAME)
1861 /* If the statement defines an induction variable, the uses are not
1862 interesting by themselves. */
1864 iv = get_iv (data, *lhs);
1866 if (iv && !integer_zerop (iv->step))
1867 return;
1870 code = gimple_assign_rhs_code (stmt);
1871 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1872 && (REFERENCE_CLASS_P (*rhs)
1873 || is_gimple_val (*rhs)))
1875 if (REFERENCE_CLASS_P (*rhs))
1876 find_interesting_uses_address (data, stmt, rhs);
1877 else
1878 find_interesting_uses_op (data, *rhs);
1880 if (REFERENCE_CLASS_P (*lhs))
1881 find_interesting_uses_address (data, stmt, lhs);
1882 return;
1884 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1886 find_interesting_uses_cond (data, stmt);
1887 return;
1890 /* TODO -- we should also handle address uses of type
1892 memory = call (whatever);
1896 call (memory). */
1899 if (gimple_code (stmt) == GIMPLE_PHI
1900 && gimple_bb (stmt) == data->current_loop->header)
1902 iv = get_iv (data, PHI_RESULT (stmt));
1904 if (iv && !integer_zerop (iv->step))
1905 return;
1908 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1910 op = USE_FROM_PTR (use_p);
1912 if (TREE_CODE (op) != SSA_NAME)
1913 continue;
1915 iv = get_iv (data, op);
1916 if (!iv)
1917 continue;
1919 find_interesting_uses_op (data, op);
1923 /* Finds interesting uses of induction variables outside of loops
1924 on loop exit edge EXIT. */
1926 static void
1927 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1929 gimple phi;
1930 gimple_stmt_iterator psi;
1931 tree def;
1933 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1935 phi = gsi_stmt (psi);
1936 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1937 if (is_gimple_reg (def))
1938 find_interesting_uses_op (data, def);
1942 /* Finds uses of the induction variables that are interesting. */
1944 static void
1945 find_interesting_uses (struct ivopts_data *data)
1947 basic_block bb;
1948 gimple_stmt_iterator bsi;
1949 basic_block *body = get_loop_body (data->current_loop);
1950 unsigned i;
1951 struct version_info *info;
1952 edge e;
1954 if (dump_file && (dump_flags & TDF_DETAILS))
1955 fprintf (dump_file, "Uses:\n\n");
1957 for (i = 0; i < data->current_loop->num_nodes; i++)
1959 edge_iterator ei;
1960 bb = body[i];
1962 FOR_EACH_EDGE (e, ei, bb->succs)
1963 if (e->dest != EXIT_BLOCK_PTR
1964 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1965 find_interesting_uses_outside (data, e);
1967 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1968 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1969 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1970 if (!is_gimple_debug (gsi_stmt (bsi)))
1971 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1974 if (dump_file && (dump_flags & TDF_DETAILS))
1976 bitmap_iterator bi;
1978 fprintf (dump_file, "\n");
1980 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1982 info = ver_info (data, i);
1983 if (info->inv_id)
1985 fprintf (dump_file, " ");
1986 print_generic_expr (dump_file, info->name, TDF_SLIM);
1987 fprintf (dump_file, " is invariant (%d)%s\n",
1988 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1992 fprintf (dump_file, "\n");
1995 free (body);
1998 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1999 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2000 we are at the top-level of the processed address. */
2002 static tree
2003 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2004 unsigned HOST_WIDE_INT *offset)
2006 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2007 enum tree_code code;
2008 tree type, orig_type = TREE_TYPE (expr);
2009 unsigned HOST_WIDE_INT off0, off1, st;
2010 tree orig_expr = expr;
2012 STRIP_NOPS (expr);
2014 type = TREE_TYPE (expr);
2015 code = TREE_CODE (expr);
2016 *offset = 0;
2018 switch (code)
2020 case INTEGER_CST:
2021 if (!cst_and_fits_in_hwi (expr)
2022 || integer_zerop (expr))
2023 return orig_expr;
2025 *offset = int_cst_value (expr);
2026 return build_int_cst (orig_type, 0);
2028 case POINTER_PLUS_EXPR:
2029 case PLUS_EXPR:
2030 case MINUS_EXPR:
2031 op0 = TREE_OPERAND (expr, 0);
2032 op1 = TREE_OPERAND (expr, 1);
2034 op0 = strip_offset_1 (op0, false, false, &off0);
2035 op1 = strip_offset_1 (op1, false, false, &off1);
2037 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2038 if (op0 == TREE_OPERAND (expr, 0)
2039 && op1 == TREE_OPERAND (expr, 1))
2040 return orig_expr;
2042 if (integer_zerop (op1))
2043 expr = op0;
2044 else if (integer_zerop (op0))
2046 if (code == MINUS_EXPR)
2047 expr = fold_build1 (NEGATE_EXPR, type, op1);
2048 else
2049 expr = op1;
2051 else
2052 expr = fold_build2 (code, type, op0, op1);
2054 return fold_convert (orig_type, expr);
2056 case MULT_EXPR:
2057 op1 = TREE_OPERAND (expr, 1);
2058 if (!cst_and_fits_in_hwi (op1))
2059 return orig_expr;
2061 op0 = TREE_OPERAND (expr, 0);
2062 op0 = strip_offset_1 (op0, false, false, &off0);
2063 if (op0 == TREE_OPERAND (expr, 0))
2064 return orig_expr;
2066 *offset = off0 * int_cst_value (op1);
2067 if (integer_zerop (op0))
2068 expr = op0;
2069 else
2070 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2072 return fold_convert (orig_type, expr);
2074 case ARRAY_REF:
2075 case ARRAY_RANGE_REF:
2076 if (!inside_addr)
2077 return orig_expr;
2079 step = array_ref_element_size (expr);
2080 if (!cst_and_fits_in_hwi (step))
2081 break;
2083 st = int_cst_value (step);
2084 op1 = TREE_OPERAND (expr, 1);
2085 op1 = strip_offset_1 (op1, false, false, &off1);
2086 *offset = off1 * st;
2088 if (top_compref
2089 && integer_zerop (op1))
2091 /* Strip the component reference completely. */
2092 op0 = TREE_OPERAND (expr, 0);
2093 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2094 *offset += off0;
2095 return op0;
2097 break;
2099 case COMPONENT_REF:
2100 if (!inside_addr)
2101 return orig_expr;
2103 tmp = component_ref_field_offset (expr);
2104 if (top_compref
2105 && cst_and_fits_in_hwi (tmp))
2107 /* Strip the component reference completely. */
2108 op0 = TREE_OPERAND (expr, 0);
2109 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2110 *offset = off0 + int_cst_value (tmp);
2111 return op0;
2113 break;
2115 case ADDR_EXPR:
2116 op0 = TREE_OPERAND (expr, 0);
2117 op0 = strip_offset_1 (op0, true, true, &off0);
2118 *offset += off0;
2120 if (op0 == TREE_OPERAND (expr, 0))
2121 return orig_expr;
2123 expr = build_fold_addr_expr (op0);
2124 return fold_convert (orig_type, expr);
2126 case MEM_REF:
2127 /* ??? Offset operand? */
2128 inside_addr = false;
2129 break;
2131 default:
2132 return orig_expr;
2135 /* Default handling of expressions for that we want to recurse into
2136 the first operand. */
2137 op0 = TREE_OPERAND (expr, 0);
2138 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2139 *offset += off0;
2141 if (op0 == TREE_OPERAND (expr, 0)
2142 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2143 return orig_expr;
2145 expr = copy_node (expr);
2146 TREE_OPERAND (expr, 0) = op0;
2147 if (op1)
2148 TREE_OPERAND (expr, 1) = op1;
2150 /* Inside address, we might strip the top level component references,
2151 thus changing type of the expression. Handling of ADDR_EXPR
2152 will fix that. */
2153 expr = fold_convert (orig_type, expr);
2155 return expr;
2158 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2160 static tree
2161 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2163 return strip_offset_1 (expr, false, false, offset);
2166 /* Returns variant of TYPE that can be used as base for different uses.
2167 We return unsigned type with the same precision, which avoids problems
2168 with overflows. */
2170 static tree
2171 generic_type_for (tree type)
2173 if (POINTER_TYPE_P (type))
2174 return unsigned_type_for (type);
2176 if (TYPE_UNSIGNED (type))
2177 return type;
2179 return unsigned_type_for (type);
2182 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2183 the bitmap to that we should store it. */
2185 static struct ivopts_data *fd_ivopts_data;
2186 static tree
2187 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2189 bitmap *depends_on = (bitmap *) data;
2190 struct version_info *info;
2192 if (TREE_CODE (*expr_p) != SSA_NAME)
2193 return NULL_TREE;
2194 info = name_info (fd_ivopts_data, *expr_p);
2196 if (!info->inv_id || info->has_nonlin_use)
2197 return NULL_TREE;
2199 if (!*depends_on)
2200 *depends_on = BITMAP_ALLOC (NULL);
2201 bitmap_set_bit (*depends_on, info->inv_id);
2203 return NULL_TREE;
2206 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2207 position to POS. If USE is not NULL, the candidate is set as related to
2208 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2209 replacement of the final value of the iv by a direct computation. */
2211 static struct iv_cand *
2212 add_candidate_1 (struct ivopts_data *data,
2213 tree base, tree step, bool important, enum iv_position pos,
2214 struct iv_use *use, gimple incremented_at)
2216 unsigned i;
2217 struct iv_cand *cand = NULL;
2218 tree type, orig_type;
2220 if (base)
2222 orig_type = TREE_TYPE (base);
2223 type = generic_type_for (orig_type);
2224 if (type != orig_type)
2226 base = fold_convert (type, base);
2227 step = fold_convert (type, step);
2231 for (i = 0; i < n_iv_cands (data); i++)
2233 cand = iv_cand (data, i);
2235 if (cand->pos != pos)
2236 continue;
2238 if (cand->incremented_at != incremented_at
2239 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2240 && cand->ainc_use != use))
2241 continue;
2243 if (!cand->iv)
2245 if (!base && !step)
2246 break;
2248 continue;
2251 if (!base && !step)
2252 continue;
2254 if (operand_equal_p (base, cand->iv->base, 0)
2255 && operand_equal_p (step, cand->iv->step, 0)
2256 && (TYPE_PRECISION (TREE_TYPE (base))
2257 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2258 break;
2261 if (i == n_iv_cands (data))
2263 cand = XCNEW (struct iv_cand);
2264 cand->id = i;
2266 if (!base && !step)
2267 cand->iv = NULL;
2268 else
2269 cand->iv = alloc_iv (base, step);
2271 cand->pos = pos;
2272 if (pos != IP_ORIGINAL && cand->iv)
2274 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2275 cand->var_after = cand->var_before;
2277 cand->important = important;
2278 cand->incremented_at = incremented_at;
2279 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2281 if (step
2282 && TREE_CODE (step) != INTEGER_CST)
2284 fd_ivopts_data = data;
2285 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2288 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2289 cand->ainc_use = use;
2290 else
2291 cand->ainc_use = NULL;
2293 if (dump_file && (dump_flags & TDF_DETAILS))
2294 dump_cand (dump_file, cand);
2297 if (important && !cand->important)
2299 cand->important = true;
2300 if (dump_file && (dump_flags & TDF_DETAILS))
2301 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2304 if (use)
2306 bitmap_set_bit (use->related_cands, i);
2307 if (dump_file && (dump_flags & TDF_DETAILS))
2308 fprintf (dump_file, "Candidate %d is related to use %d\n",
2309 cand->id, use->id);
2312 return cand;
2315 /* Returns true if incrementing the induction variable at the end of the LOOP
2316 is allowed.
2318 The purpose is to avoid splitting latch edge with a biv increment, thus
2319 creating a jump, possibly confusing other optimization passes and leaving
2320 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2321 is not available (so we do not have a better alternative), or if the latch
2322 edge is already nonempty. */
2324 static bool
2325 allow_ip_end_pos_p (struct loop *loop)
2327 if (!ip_normal_pos (loop))
2328 return true;
2330 if (!empty_block_p (ip_end_pos (loop)))
2331 return true;
2333 return false;
2336 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2337 Important field is set to IMPORTANT. */
2339 static void
2340 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2341 bool important, struct iv_use *use)
2343 basic_block use_bb = gimple_bb (use->stmt);
2344 enum machine_mode mem_mode;
2345 unsigned HOST_WIDE_INT cstepi;
2347 /* If we insert the increment in any position other than the standard
2348 ones, we must ensure that it is incremented once per iteration.
2349 It must not be in an inner nested loop, or one side of an if
2350 statement. */
2351 if (use_bb->loop_father != data->current_loop
2352 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2353 || stmt_could_throw_p (use->stmt)
2354 || !cst_and_fits_in_hwi (step))
2355 return;
2357 cstepi = int_cst_value (step);
2359 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2360 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2361 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2363 enum tree_code code = MINUS_EXPR;
2364 tree new_base;
2365 tree new_step = step;
2367 if (POINTER_TYPE_P (TREE_TYPE (base)))
2369 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2370 code = POINTER_PLUS_EXPR;
2372 else
2373 new_step = fold_convert (TREE_TYPE (base), new_step);
2374 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2375 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2376 use->stmt);
2378 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2379 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2381 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2382 use->stmt);
2386 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2387 position to POS. If USE is not NULL, the candidate is set as related to
2388 it. The candidate computation is scheduled on all available positions. */
2390 static void
2391 add_candidate (struct ivopts_data *data,
2392 tree base, tree step, bool important, struct iv_use *use)
2394 if (ip_normal_pos (data->current_loop))
2395 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2396 if (ip_end_pos (data->current_loop)
2397 && allow_ip_end_pos_p (data->current_loop))
2398 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2400 if (use != NULL && use->type == USE_ADDRESS)
2401 add_autoinc_candidates (data, base, step, important, use);
2404 /* Add a standard "0 + 1 * iteration" iv candidate for a
2405 type with SIZE bits. */
2407 static void
2408 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2409 unsigned int size)
2411 tree type = lang_hooks.types.type_for_size (size, true);
2412 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2413 true, NULL);
2416 /* Adds standard iv candidates. */
2418 static void
2419 add_standard_iv_candidates (struct ivopts_data *data)
2421 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2423 /* The same for a double-integer type if it is still fast enough. */
2424 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2425 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2429 /* Adds candidates bases on the old induction variable IV. */
2431 static void
2432 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2434 gimple phi;
2435 tree def;
2436 struct iv_cand *cand;
2438 add_candidate (data, iv->base, iv->step, true, NULL);
2440 /* The same, but with initial value zero. */
2441 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2442 add_candidate (data, size_int (0), iv->step, true, NULL);
2443 else
2444 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2445 iv->step, true, NULL);
2447 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2448 if (gimple_code (phi) == GIMPLE_PHI)
2450 /* Additionally record the possibility of leaving the original iv
2451 untouched. */
2452 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2453 cand = add_candidate_1 (data,
2454 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2455 SSA_NAME_DEF_STMT (def));
2456 cand->var_before = iv->ssa_name;
2457 cand->var_after = def;
2461 /* Adds candidates based on the old induction variables. */
2463 static void
2464 add_old_ivs_candidates (struct ivopts_data *data)
2466 unsigned i;
2467 struct iv *iv;
2468 bitmap_iterator bi;
2470 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2472 iv = ver_info (data, i)->iv;
2473 if (iv && iv->biv_p && !integer_zerop (iv->step))
2474 add_old_iv_candidates (data, iv);
2478 /* Adds candidates based on the value of the induction variable IV and USE. */
2480 static void
2481 add_iv_value_candidates (struct ivopts_data *data,
2482 struct iv *iv, struct iv_use *use)
2484 unsigned HOST_WIDE_INT offset;
2485 tree base;
2486 tree basetype;
2488 add_candidate (data, iv->base, iv->step, false, use);
2490 /* The same, but with initial value zero. Make such variable important,
2491 since it is generic enough so that possibly many uses may be based
2492 on it. */
2493 basetype = TREE_TYPE (iv->base);
2494 if (POINTER_TYPE_P (basetype))
2495 basetype = sizetype;
2496 add_candidate (data, build_int_cst (basetype, 0),
2497 iv->step, true, use);
2499 /* Third, try removing the constant offset. Make sure to even
2500 add a candidate for &a[0] vs. (T *)&a. */
2501 base = strip_offset (iv->base, &offset);
2502 if (offset
2503 || base != iv->base)
2504 add_candidate (data, base, iv->step, false, use);
2507 /* Adds candidates based on the uses. */
2509 static void
2510 add_derived_ivs_candidates (struct ivopts_data *data)
2512 unsigned i;
2514 for (i = 0; i < n_iv_uses (data); i++)
2516 struct iv_use *use = iv_use (data, i);
2518 if (!use)
2519 continue;
2521 switch (use->type)
2523 case USE_NONLINEAR_EXPR:
2524 case USE_COMPARE:
2525 case USE_ADDRESS:
2526 /* Just add the ivs based on the value of the iv used here. */
2527 add_iv_value_candidates (data, use->iv, use);
2528 break;
2530 default:
2531 gcc_unreachable ();
2536 /* Record important candidates and add them to related_cands bitmaps
2537 if needed. */
2539 static void
2540 record_important_candidates (struct ivopts_data *data)
2542 unsigned i;
2543 struct iv_use *use;
2545 for (i = 0; i < n_iv_cands (data); i++)
2547 struct iv_cand *cand = iv_cand (data, i);
2549 if (cand->important)
2550 bitmap_set_bit (data->important_candidates, i);
2553 data->consider_all_candidates = (n_iv_cands (data)
2554 <= CONSIDER_ALL_CANDIDATES_BOUND);
2556 if (data->consider_all_candidates)
2558 /* We will not need "related_cands" bitmaps in this case,
2559 so release them to decrease peak memory consumption. */
2560 for (i = 0; i < n_iv_uses (data); i++)
2562 use = iv_use (data, i);
2563 BITMAP_FREE (use->related_cands);
2566 else
2568 /* Add important candidates to the related_cands bitmaps. */
2569 for (i = 0; i < n_iv_uses (data); i++)
2570 bitmap_ior_into (iv_use (data, i)->related_cands,
2571 data->important_candidates);
2575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2577 we allocate a simple list to every use. */
2579 static void
2580 alloc_use_cost_map (struct ivopts_data *data)
2582 unsigned i, size, s, j;
2584 for (i = 0; i < n_iv_uses (data); i++)
2586 struct iv_use *use = iv_use (data, i);
2587 bitmap_iterator bi;
2589 if (data->consider_all_candidates)
2590 size = n_iv_cands (data);
2591 else
2593 s = 0;
2594 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2596 s++;
2599 /* Round up to the power of two, so that moduling by it is fast. */
2600 for (size = 1; size < s; size <<= 1)
2601 continue;
2604 use->n_map_members = size;
2605 use->cost_map = XCNEWVEC (struct cost_pair, size);
2609 /* Returns description of computation cost of expression whose runtime
2610 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2612 static comp_cost
2613 new_cost (unsigned runtime, unsigned complexity)
2615 comp_cost cost;
2617 cost.cost = runtime;
2618 cost.complexity = complexity;
2620 return cost;
2623 /* Adds costs COST1 and COST2. */
2625 static comp_cost
2626 add_costs (comp_cost cost1, comp_cost cost2)
2628 cost1.cost += cost2.cost;
2629 cost1.complexity += cost2.complexity;
2631 return cost1;
2633 /* Subtracts costs COST1 and COST2. */
2635 static comp_cost
2636 sub_costs (comp_cost cost1, comp_cost cost2)
2638 cost1.cost -= cost2.cost;
2639 cost1.complexity -= cost2.complexity;
2641 return cost1;
2644 /* Returns a negative number if COST1 < COST2, a positive number if
2645 COST1 > COST2, and 0 if COST1 = COST2. */
2647 static int
2648 compare_costs (comp_cost cost1, comp_cost cost2)
2650 if (cost1.cost == cost2.cost)
2651 return cost1.complexity - cost2.complexity;
2653 return cost1.cost - cost2.cost;
2656 /* Returns true if COST is infinite. */
2658 static bool
2659 infinite_cost_p (comp_cost cost)
2661 return cost.cost == INFTY;
2664 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2665 on invariants DEPENDS_ON and that the value used in expressing it
2666 is VALUE. */
2668 static void
2669 set_use_iv_cost (struct ivopts_data *data,
2670 struct iv_use *use, struct iv_cand *cand,
2671 comp_cost cost, bitmap depends_on, tree value,
2672 int inv_expr_id)
2674 unsigned i, s;
2676 if (infinite_cost_p (cost))
2678 BITMAP_FREE (depends_on);
2679 return;
2682 if (data->consider_all_candidates)
2684 use->cost_map[cand->id].cand = cand;
2685 use->cost_map[cand->id].cost = cost;
2686 use->cost_map[cand->id].depends_on = depends_on;
2687 use->cost_map[cand->id].value = value;
2688 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2689 return;
2692 /* n_map_members is a power of two, so this computes modulo. */
2693 s = cand->id & (use->n_map_members - 1);
2694 for (i = s; i < use->n_map_members; i++)
2695 if (!use->cost_map[i].cand)
2696 goto found;
2697 for (i = 0; i < s; i++)
2698 if (!use->cost_map[i].cand)
2699 goto found;
2701 gcc_unreachable ();
2703 found:
2704 use->cost_map[i].cand = cand;
2705 use->cost_map[i].cost = cost;
2706 use->cost_map[i].depends_on = depends_on;
2707 use->cost_map[i].value = value;
2708 use->cost_map[i].inv_expr_id = inv_expr_id;
2711 /* Gets cost of (USE, CANDIDATE) pair. */
2713 static struct cost_pair *
2714 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2715 struct iv_cand *cand)
2717 unsigned i, s;
2718 struct cost_pair *ret;
2720 if (!cand)
2721 return NULL;
2723 if (data->consider_all_candidates)
2725 ret = use->cost_map + cand->id;
2726 if (!ret->cand)
2727 return NULL;
2729 return ret;
2732 /* n_map_members is a power of two, so this computes modulo. */
2733 s = cand->id & (use->n_map_members - 1);
2734 for (i = s; i < use->n_map_members; i++)
2735 if (use->cost_map[i].cand == cand)
2736 return use->cost_map + i;
2738 for (i = 0; i < s; i++)
2739 if (use->cost_map[i].cand == cand)
2740 return use->cost_map + i;
2742 return NULL;
2745 /* Returns estimate on cost of computing SEQ. */
2747 static unsigned
2748 seq_cost (rtx seq, bool speed)
2750 unsigned cost = 0;
2751 rtx set;
2753 for (; seq; seq = NEXT_INSN (seq))
2755 set = single_set (seq);
2756 if (set)
2757 cost += rtx_cost (SET_SRC (set), SET, speed);
2758 else
2759 cost++;
2762 return cost;
2765 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2766 static rtx
2767 produce_memory_decl_rtl (tree obj, int *regno)
2769 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2770 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2771 rtx x;
2773 gcc_assert (obj);
2774 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2776 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2777 x = gen_rtx_SYMBOL_REF (address_mode, name);
2778 SET_SYMBOL_REF_DECL (x, obj);
2779 x = gen_rtx_MEM (DECL_MODE (obj), x);
2780 set_mem_addr_space (x, as);
2781 targetm.encode_section_info (obj, x, true);
2783 else
2785 x = gen_raw_REG (address_mode, (*regno)++);
2786 x = gen_rtx_MEM (DECL_MODE (obj), x);
2787 set_mem_addr_space (x, as);
2790 return x;
2793 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2794 walk_tree. DATA contains the actual fake register number. */
2796 static tree
2797 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2799 tree obj = NULL_TREE;
2800 rtx x = NULL_RTX;
2801 int *regno = (int *) data;
2803 switch (TREE_CODE (*expr_p))
2805 case ADDR_EXPR:
2806 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2807 handled_component_p (*expr_p);
2808 expr_p = &TREE_OPERAND (*expr_p, 0))
2809 continue;
2810 obj = *expr_p;
2811 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2812 x = produce_memory_decl_rtl (obj, regno);
2813 break;
2815 case SSA_NAME:
2816 *ws = 0;
2817 obj = SSA_NAME_VAR (*expr_p);
2818 if (!DECL_RTL_SET_P (obj))
2819 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820 break;
2822 case VAR_DECL:
2823 case PARM_DECL:
2824 case RESULT_DECL:
2825 *ws = 0;
2826 obj = *expr_p;
2828 if (DECL_RTL_SET_P (obj))
2829 break;
2831 if (DECL_MODE (obj) == BLKmode)
2832 x = produce_memory_decl_rtl (obj, regno);
2833 else
2834 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2836 break;
2838 default:
2839 break;
2842 if (x)
2844 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2845 SET_DECL_RTL (obj, x);
2848 return NULL_TREE;
2851 /* Determines cost of the computation of EXPR. */
2853 static unsigned
2854 computation_cost (tree expr, bool speed)
2856 rtx seq, rslt;
2857 tree type = TREE_TYPE (expr);
2858 unsigned cost;
2859 /* Avoid using hard regs in ways which may be unsupported. */
2860 int regno = LAST_VIRTUAL_REGISTER + 1;
2861 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2862 enum node_frequency real_frequency = node->frequency;
2864 node->frequency = NODE_FREQUENCY_NORMAL;
2865 crtl->maybe_hot_insn_p = speed;
2866 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2867 start_sequence ();
2868 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2869 seq = get_insns ();
2870 end_sequence ();
2871 default_rtl_profile ();
2872 node->frequency = real_frequency;
2874 cost = seq_cost (seq, speed);
2875 if (MEM_P (rslt))
2876 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2877 TYPE_ADDR_SPACE (type), speed);
2878 else if (!REG_P (rslt))
2879 cost += rtx_cost (rslt, SET, speed);
2881 return cost;
2884 /* Returns variable containing the value of candidate CAND at statement AT. */
2886 static tree
2887 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2889 if (stmt_after_increment (loop, cand, stmt))
2890 return cand->var_after;
2891 else
2892 return cand->var_before;
2895 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2896 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2899 tree_int_cst_sign_bit (const_tree t)
2901 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2902 unsigned HOST_WIDE_INT w;
2904 if (bitno < HOST_BITS_PER_WIDE_INT)
2905 w = TREE_INT_CST_LOW (t);
2906 else
2908 w = TREE_INT_CST_HIGH (t);
2909 bitno -= HOST_BITS_PER_WIDE_INT;
2912 return (w >> bitno) & 1;
2915 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2916 same precision that is at least as wide as the precision of TYPE, stores
2917 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2918 type of A and B. */
2920 static tree
2921 determine_common_wider_type (tree *a, tree *b)
2923 tree wider_type = NULL;
2924 tree suba, subb;
2925 tree atype = TREE_TYPE (*a);
2927 if (CONVERT_EXPR_P (*a))
2929 suba = TREE_OPERAND (*a, 0);
2930 wider_type = TREE_TYPE (suba);
2931 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2932 return atype;
2934 else
2935 return atype;
2937 if (CONVERT_EXPR_P (*b))
2939 subb = TREE_OPERAND (*b, 0);
2940 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2941 return atype;
2943 else
2944 return atype;
2946 *a = suba;
2947 *b = subb;
2948 return wider_type;
2951 /* Determines the expression by that USE is expressed from induction variable
2952 CAND at statement AT in LOOP. The expression is stored in a decomposed
2953 form into AFF. Returns false if USE cannot be expressed using CAND. */
2955 static bool
2956 get_computation_aff (struct loop *loop,
2957 struct iv_use *use, struct iv_cand *cand, gimple at,
2958 struct affine_tree_combination *aff)
2960 tree ubase = use->iv->base;
2961 tree ustep = use->iv->step;
2962 tree cbase = cand->iv->base;
2963 tree cstep = cand->iv->step, cstep_common;
2964 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2965 tree common_type, var;
2966 tree uutype;
2967 aff_tree cbase_aff, var_aff;
2968 double_int rat;
2970 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2972 /* We do not have a precision to express the values of use. */
2973 return false;
2976 var = var_at_stmt (loop, cand, at);
2977 uutype = unsigned_type_for (utype);
2979 /* If the conversion is not noop, perform it. */
2980 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2982 cstep = fold_convert (uutype, cstep);
2983 cbase = fold_convert (uutype, cbase);
2984 var = fold_convert (uutype, var);
2987 if (!constant_multiple_of (ustep, cstep, &rat))
2988 return false;
2990 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2991 type, we achieve better folding by computing their difference in this
2992 wider type, and cast the result to UUTYPE. We do not need to worry about
2993 overflows, as all the arithmetics will in the end be performed in UUTYPE
2994 anyway. */
2995 common_type = determine_common_wider_type (&ubase, &cbase);
2997 /* use = ubase - ratio * cbase + ratio * var. */
2998 tree_to_aff_combination (ubase, common_type, aff);
2999 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3000 tree_to_aff_combination (var, uutype, &var_aff);
3002 /* We need to shift the value if we are after the increment. */
3003 if (stmt_after_increment (loop, cand, at))
3005 aff_tree cstep_aff;
3007 if (common_type != uutype)
3008 cstep_common = fold_convert (common_type, cstep);
3009 else
3010 cstep_common = cstep;
3012 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3013 aff_combination_add (&cbase_aff, &cstep_aff);
3016 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3017 aff_combination_add (aff, &cbase_aff);
3018 if (common_type != uutype)
3019 aff_combination_convert (aff, uutype);
3021 aff_combination_scale (&var_aff, rat);
3022 aff_combination_add (aff, &var_aff);
3024 return true;
3027 /* Determines the expression by that USE is expressed from induction variable
3028 CAND at statement AT in LOOP. The computation is unshared. */
3030 static tree
3031 get_computation_at (struct loop *loop,
3032 struct iv_use *use, struct iv_cand *cand, gimple at)
3034 aff_tree aff;
3035 tree type = TREE_TYPE (use->iv->base);
3037 if (!get_computation_aff (loop, use, cand, at, &aff))
3038 return NULL_TREE;
3039 unshare_aff_combination (&aff);
3040 return fold_convert (type, aff_combination_to_tree (&aff));
3043 /* Determines the expression by that USE is expressed from induction variable
3044 CAND in LOOP. The computation is unshared. */
3046 static tree
3047 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3049 return get_computation_at (loop, use, cand, use->stmt);
3052 /* Adjust the cost COST for being in loop setup rather than loop body.
3053 If we're optimizing for space, the loop setup overhead is constant;
3054 if we're optimizing for speed, amortize it over the per-iteration cost. */
3055 static unsigned
3056 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3058 if (cost == INFTY)
3059 return cost;
3060 else if (optimize_loop_for_speed_p (data->current_loop))
3061 return cost / avg_loop_niter (data->current_loop);
3062 else
3063 return cost;
3066 /* Returns cost of addition in MODE. */
3068 static unsigned
3069 add_cost (enum machine_mode mode, bool speed)
3071 static unsigned costs[NUM_MACHINE_MODES];
3072 rtx seq;
3073 unsigned cost;
3075 if (costs[mode])
3076 return costs[mode];
3078 start_sequence ();
3079 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3080 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3081 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3082 NULL_RTX);
3083 seq = get_insns ();
3084 end_sequence ();
3086 cost = seq_cost (seq, speed);
3087 if (!cost)
3088 cost = 1;
3090 costs[mode] = cost;
3092 if (dump_file && (dump_flags & TDF_DETAILS))
3093 fprintf (dump_file, "Addition in %s costs %d\n",
3094 GET_MODE_NAME (mode), cost);
3095 return cost;
3098 /* Entry in a hashtable of already known costs for multiplication. */
3099 struct mbc_entry
3101 HOST_WIDE_INT cst; /* The constant to multiply by. */
3102 enum machine_mode mode; /* In mode. */
3103 unsigned cost; /* The cost. */
3106 /* Counts hash value for the ENTRY. */
3108 static hashval_t
3109 mbc_entry_hash (const void *entry)
3111 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3113 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3116 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3118 static int
3119 mbc_entry_eq (const void *entry1, const void *entry2)
3121 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3122 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3124 return (e1->mode == e2->mode
3125 && e1->cst == e2->cst);
3128 /* Returns cost of multiplication by constant CST in MODE. */
3130 unsigned
3131 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3133 static htab_t costs;
3134 struct mbc_entry **cached, act;
3135 rtx seq;
3136 unsigned cost;
3138 if (!costs)
3139 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3141 act.mode = mode;
3142 act.cst = cst;
3143 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3144 if (*cached)
3145 return (*cached)->cost;
3147 *cached = XNEW (struct mbc_entry);
3148 (*cached)->mode = mode;
3149 (*cached)->cst = cst;
3151 start_sequence ();
3152 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3153 gen_int_mode (cst, mode), NULL_RTX, 0);
3154 seq = get_insns ();
3155 end_sequence ();
3157 cost = seq_cost (seq, speed);
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3160 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3161 (int) cst, GET_MODE_NAME (mode), cost);
3163 (*cached)->cost = cost;
3165 return cost;
3168 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3169 validity for a memory reference accessing memory of mode MODE in
3170 address space AS. */
3172 DEF_VEC_P (sbitmap);
3173 DEF_VEC_ALLOC_P (sbitmap, heap);
3175 bool
3176 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3177 addr_space_t as)
3179 #define MAX_RATIO 128
3180 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3181 static VEC (sbitmap, heap) *valid_mult_list;
3182 sbitmap valid_mult;
3184 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3185 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3187 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3188 if (!valid_mult)
3190 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3191 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3192 rtx addr;
3193 HOST_WIDE_INT i;
3195 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3196 sbitmap_zero (valid_mult);
3197 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3198 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3200 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3201 if (memory_address_addr_space_p (mode, addr, as))
3202 SET_BIT (valid_mult, i + MAX_RATIO);
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3207 fprintf (dump_file, " allowed multipliers:");
3208 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3209 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3210 fprintf (dump_file, " %d", (int) i);
3211 fprintf (dump_file, "\n");
3212 fprintf (dump_file, "\n");
3215 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3218 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3219 return false;
3221 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. Compute the cost for a memory reference that accesses
3227 a memory location of mode MEM_MODE in address space AS.
3229 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3230 size of MEM_MODE / RATIO) is available. To make this determination, we
3231 look at the size of the increment to be made, which is given in CSTEP.
3232 CSTEP may be zero if the step is unknown.
3233 STMT_AFTER_INC is true iff the statement we're looking at is after the
3234 increment of the original biv.
3236 TODO -- there must be some better way. This all is quite crude. */
3238 typedef struct
3240 HOST_WIDE_INT min_offset, max_offset;
3241 unsigned costs[2][2][2][2];
3242 } *address_cost_data;
3244 DEF_VEC_P (address_cost_data);
3245 DEF_VEC_ALLOC_P (address_cost_data, heap);
3247 static comp_cost
3248 get_address_cost (bool symbol_present, bool var_present,
3249 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3250 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3251 addr_space_t as, bool speed,
3252 bool stmt_after_inc, bool *may_autoinc)
3254 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3255 static VEC(address_cost_data, heap) *address_cost_data_list;
3256 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3257 address_cost_data data;
3258 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3259 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3260 unsigned cost, acost, complexity;
3261 bool offset_p, ratio_p, autoinc;
3262 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3263 unsigned HOST_WIDE_INT mask;
3264 unsigned bits;
3266 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3267 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3268 data_index + 1);
3270 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3271 if (!data)
3273 HOST_WIDE_INT i;
3274 HOST_WIDE_INT rat, off = 0;
3275 int old_cse_not_expected, width;
3276 unsigned sym_p, var_p, off_p, rat_p, add_c;
3277 rtx seq, addr, base;
3278 rtx reg0, reg1;
3280 data = (address_cost_data) xcalloc (1, sizeof (*data));
3282 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3284 width = GET_MODE_BITSIZE (address_mode) - 1;
3285 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3286 width = HOST_BITS_PER_WIDE_INT - 1;
3287 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3289 for (i = width; i >= 0; i--)
3291 off = -((HOST_WIDE_INT) 1 << i);
3292 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3293 if (memory_address_addr_space_p (mem_mode, addr, as))
3294 break;
3296 data->min_offset = (i == -1? 0 : off);
3298 for (i = width; i >= 0; i--)
3300 off = ((HOST_WIDE_INT) 1 << i) - 1;
3301 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3302 if (memory_address_addr_space_p (mem_mode, addr, as))
3303 break;
3305 if (i == -1)
3306 off = 0;
3307 data->max_offset = off;
3309 if (dump_file && (dump_flags & TDF_DETAILS))
3311 fprintf (dump_file, "get_address_cost:\n");
3312 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3313 GET_MODE_NAME (mem_mode),
3314 data->min_offset);
3315 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3316 GET_MODE_NAME (mem_mode),
3317 data->max_offset);
3320 rat = 1;
3321 for (i = 2; i <= MAX_RATIO; i++)
3322 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3324 rat = i;
3325 break;
3328 /* Compute the cost of various addressing modes. */
3329 acost = 0;
3330 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3331 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3333 if (HAVE_PRE_DECREMENT)
3335 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3336 has_predec[mem_mode]
3337 = memory_address_addr_space_p (mem_mode, addr, as);
3339 if (HAVE_POST_DECREMENT)
3341 addr = gen_rtx_POST_DEC (address_mode, reg0);
3342 has_postdec[mem_mode]
3343 = memory_address_addr_space_p (mem_mode, addr, as);
3345 if (HAVE_PRE_INCREMENT)
3347 addr = gen_rtx_PRE_INC (address_mode, reg0);
3348 has_preinc[mem_mode]
3349 = memory_address_addr_space_p (mem_mode, addr, as);
3351 if (HAVE_POST_INCREMENT)
3353 addr = gen_rtx_POST_INC (address_mode, reg0);
3354 has_postinc[mem_mode]
3355 = memory_address_addr_space_p (mem_mode, addr, as);
3357 for (i = 0; i < 16; i++)
3359 sym_p = i & 1;
3360 var_p = (i >> 1) & 1;
3361 off_p = (i >> 2) & 1;
3362 rat_p = (i >> 3) & 1;
3364 addr = reg0;
3365 if (rat_p)
3366 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3367 gen_int_mode (rat, address_mode));
3369 if (var_p)
3370 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3372 if (sym_p)
3374 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3375 /* ??? We can run into trouble with some backends by presenting
3376 it with symbols which haven't been properly passed through
3377 targetm.encode_section_info. By setting the local bit, we
3378 enhance the probability of things working. */
3379 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3381 if (off_p)
3382 base = gen_rtx_fmt_e (CONST, address_mode,
3383 gen_rtx_fmt_ee
3384 (PLUS, address_mode, base,
3385 gen_int_mode (off, address_mode)));
3387 else if (off_p)
3388 base = gen_int_mode (off, address_mode);
3389 else
3390 base = NULL_RTX;
3392 if (base)
3393 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3395 start_sequence ();
3396 /* To avoid splitting addressing modes, pretend that no cse will
3397 follow. */
3398 old_cse_not_expected = cse_not_expected;
3399 cse_not_expected = true;
3400 addr = memory_address_addr_space (mem_mode, addr, as);
3401 cse_not_expected = old_cse_not_expected;
3402 seq = get_insns ();
3403 end_sequence ();
3405 acost = seq_cost (seq, speed);
3406 acost += address_cost (addr, mem_mode, as, speed);
3408 if (!acost)
3409 acost = 1;
3410 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3413 /* On some targets, it is quite expensive to load symbol to a register,
3414 which makes addresses that contain symbols look much more expensive.
3415 However, the symbol will have to be loaded in any case before the
3416 loop (and quite likely we have it in register already), so it does not
3417 make much sense to penalize them too heavily. So make some final
3418 tweaks for the SYMBOL_PRESENT modes:
3420 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3421 var is cheaper, use this mode with small penalty.
3422 If VAR_PRESENT is true, try whether the mode with
3423 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3424 if this is the case, use it. */
3425 add_c = add_cost (address_mode, speed);
3426 for (i = 0; i < 8; i++)
3428 var_p = i & 1;
3429 off_p = (i >> 1) & 1;
3430 rat_p = (i >> 2) & 1;
3432 acost = data->costs[0][1][off_p][rat_p] + 1;
3433 if (var_p)
3434 acost += add_c;
3436 if (acost < data->costs[1][var_p][off_p][rat_p])
3437 data->costs[1][var_p][off_p][rat_p] = acost;
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3442 fprintf (dump_file, "Address costs:\n");
3444 for (i = 0; i < 16; i++)
3446 sym_p = i & 1;
3447 var_p = (i >> 1) & 1;
3448 off_p = (i >> 2) & 1;
3449 rat_p = (i >> 3) & 1;
3451 fprintf (dump_file, " ");
3452 if (sym_p)
3453 fprintf (dump_file, "sym + ");
3454 if (var_p)
3455 fprintf (dump_file, "var + ");
3456 if (off_p)
3457 fprintf (dump_file, "cst + ");
3458 if (rat_p)
3459 fprintf (dump_file, "rat * ");
3461 acost = data->costs[sym_p][var_p][off_p][rat_p];
3462 fprintf (dump_file, "index costs %d\n", acost);
3464 if (has_predec[mem_mode] || has_postdec[mem_mode]
3465 || has_preinc[mem_mode] || has_postinc[mem_mode])
3466 fprintf (dump_file, " May include autoinc/dec\n");
3467 fprintf (dump_file, "\n");
3470 VEC_replace (address_cost_data, address_cost_data_list,
3471 data_index, data);
3474 bits = GET_MODE_BITSIZE (address_mode);
3475 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3476 offset &= mask;
3477 if ((offset >> (bits - 1) & 1))
3478 offset |= ~mask;
3479 s_offset = offset;
3481 autoinc = false;
3482 msize = GET_MODE_SIZE (mem_mode);
3483 autoinc_offset = offset;
3484 if (stmt_after_inc)
3485 autoinc_offset += ratio * cstep;
3486 if (symbol_present || var_present || ratio != 1)
3487 autoinc = false;
3488 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3489 && msize == cstep)
3490 || (has_postdec[mem_mode] && autoinc_offset == 0
3491 && msize == -cstep)
3492 || (has_preinc[mem_mode] && autoinc_offset == msize
3493 && msize == cstep)
3494 || (has_predec[mem_mode] && autoinc_offset == -msize
3495 && msize == -cstep))
3496 autoinc = true;
3498 cost = 0;
3499 offset_p = (s_offset != 0
3500 && data->min_offset <= s_offset
3501 && s_offset <= data->max_offset);
3502 ratio_p = (ratio != 1
3503 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3505 if (ratio != 1 && !ratio_p)
3506 cost += multiply_by_cost (ratio, address_mode, speed);
3508 if (s_offset && !offset_p && !symbol_present)
3509 cost += add_cost (address_mode, speed);
3511 if (may_autoinc)
3512 *may_autoinc = autoinc;
3513 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3514 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3515 return new_cost (cost + acost, complexity);
3518 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3519 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3520 calculating the operands of EXPR. Returns true if successful, and returns
3521 the cost in COST. */
3523 static bool
3524 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3525 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3527 comp_cost res;
3528 tree op1 = TREE_OPERAND (expr, 1);
3529 tree cst = TREE_OPERAND (mult, 1);
3530 tree multop = TREE_OPERAND (mult, 0);
3531 int m = exact_log2 (int_cst_value (cst));
3532 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3533 int sa_cost;
3535 if (!(m >= 0 && m < maxm))
3536 return false;
3538 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3539 ? shiftadd_cost[speed][mode][m]
3540 : (mult == op1
3541 ? shiftsub1_cost[speed][mode][m]
3542 : shiftsub0_cost[speed][mode][m]));
3543 res = new_cost (sa_cost, 0);
3544 res = add_costs (res, mult == op1 ? cost0 : cost1);
3546 STRIP_NOPS (multop);
3547 if (!is_gimple_val (multop))
3548 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3550 *cost = res;
3551 return true;
3554 /* Estimates cost of forcing expression EXPR into a variable. */
3556 static comp_cost
3557 force_expr_to_var_cost (tree expr, bool speed)
3559 static bool costs_initialized = false;
3560 static unsigned integer_cost [2];
3561 static unsigned symbol_cost [2];
3562 static unsigned address_cost [2];
3563 tree op0, op1;
3564 comp_cost cost0, cost1, cost;
3565 enum machine_mode mode;
3567 if (!costs_initialized)
3569 tree type = build_pointer_type (integer_type_node);
3570 tree var, addr;
3571 rtx x;
3572 int i;
3574 var = create_tmp_var_raw (integer_type_node, "test_var");
3575 TREE_STATIC (var) = 1;
3576 x = produce_memory_decl_rtl (var, NULL);
3577 SET_DECL_RTL (var, x);
3579 addr = build1 (ADDR_EXPR, type, var);
3582 for (i = 0; i < 2; i++)
3584 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3585 2000), i);
3587 symbol_cost[i] = computation_cost (addr, i) + 1;
3589 address_cost[i]
3590 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3591 if (dump_file && (dump_flags & TDF_DETAILS))
3593 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3594 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3595 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3596 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3597 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3598 fprintf (dump_file, "\n");
3602 costs_initialized = true;
3605 STRIP_NOPS (expr);
3607 if (SSA_VAR_P (expr))
3608 return zero_cost;
3610 if (is_gimple_min_invariant (expr))
3612 if (TREE_CODE (expr) == INTEGER_CST)
3613 return new_cost (integer_cost [speed], 0);
3615 if (TREE_CODE (expr) == ADDR_EXPR)
3617 tree obj = TREE_OPERAND (expr, 0);
3619 if (TREE_CODE (obj) == VAR_DECL
3620 || TREE_CODE (obj) == PARM_DECL
3621 || TREE_CODE (obj) == RESULT_DECL)
3622 return new_cost (symbol_cost [speed], 0);
3625 return new_cost (address_cost [speed], 0);
3628 switch (TREE_CODE (expr))
3630 case POINTER_PLUS_EXPR:
3631 case PLUS_EXPR:
3632 case MINUS_EXPR:
3633 case MULT_EXPR:
3634 op0 = TREE_OPERAND (expr, 0);
3635 op1 = TREE_OPERAND (expr, 1);
3636 STRIP_NOPS (op0);
3637 STRIP_NOPS (op1);
3639 if (is_gimple_val (op0))
3640 cost0 = zero_cost;
3641 else
3642 cost0 = force_expr_to_var_cost (op0, speed);
3644 if (is_gimple_val (op1))
3645 cost1 = zero_cost;
3646 else
3647 cost1 = force_expr_to_var_cost (op1, speed);
3649 break;
3651 case NEGATE_EXPR:
3652 op0 = TREE_OPERAND (expr, 0);
3653 STRIP_NOPS (op0);
3654 op1 = NULL_TREE;
3656 if (is_gimple_val (op0))
3657 cost0 = zero_cost;
3658 else
3659 cost0 = force_expr_to_var_cost (op0, speed);
3661 cost1 = zero_cost;
3662 break;
3664 default:
3665 /* Just an arbitrary value, FIXME. */
3666 return new_cost (target_spill_cost[speed], 0);
3669 mode = TYPE_MODE (TREE_TYPE (expr));
3670 switch (TREE_CODE (expr))
3672 case POINTER_PLUS_EXPR:
3673 case PLUS_EXPR:
3674 case MINUS_EXPR:
3675 case NEGATE_EXPR:
3676 cost = new_cost (add_cost (mode, speed), 0);
3677 if (TREE_CODE (expr) != NEGATE_EXPR)
3679 tree mult = NULL_TREE;
3680 comp_cost sa_cost;
3681 if (TREE_CODE (op1) == MULT_EXPR)
3682 mult = op1;
3683 else if (TREE_CODE (op0) == MULT_EXPR)
3684 mult = op0;
3686 if (mult != NULL_TREE
3687 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3688 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3689 &sa_cost))
3690 return sa_cost;
3692 break;
3694 case MULT_EXPR:
3695 if (cst_and_fits_in_hwi (op0))
3696 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3697 else if (cst_and_fits_in_hwi (op1))
3698 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3699 else
3700 return new_cost (target_spill_cost [speed], 0);
3701 break;
3703 default:
3704 gcc_unreachable ();
3707 cost = add_costs (cost, cost0);
3708 cost = add_costs (cost, cost1);
3710 /* Bound the cost by target_spill_cost. The parts of complicated
3711 computations often are either loop invariant or at least can
3712 be shared between several iv uses, so letting this grow without
3713 limits would not give reasonable results. */
3714 if (cost.cost > (int) target_spill_cost [speed])
3715 cost.cost = target_spill_cost [speed];
3717 return cost;
3720 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3721 invariants the computation depends on. */
3723 static comp_cost
3724 force_var_cost (struct ivopts_data *data,
3725 tree expr, bitmap *depends_on)
3727 if (depends_on)
3729 fd_ivopts_data = data;
3730 walk_tree (&expr, find_depends, depends_on, NULL);
3733 return force_expr_to_var_cost (expr, data->speed);
3736 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3737 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3738 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3739 invariants the computation depends on. */
3741 static comp_cost
3742 split_address_cost (struct ivopts_data *data,
3743 tree addr, bool *symbol_present, bool *var_present,
3744 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3746 tree core;
3747 HOST_WIDE_INT bitsize;
3748 HOST_WIDE_INT bitpos;
3749 tree toffset;
3750 enum machine_mode mode;
3751 int unsignedp, volatilep;
3753 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3754 &unsignedp, &volatilep, false);
3756 if (toffset != 0
3757 || bitpos % BITS_PER_UNIT != 0
3758 || TREE_CODE (core) != VAR_DECL)
3760 *symbol_present = false;
3761 *var_present = true;
3762 fd_ivopts_data = data;
3763 walk_tree (&addr, find_depends, depends_on, NULL);
3764 return new_cost (target_spill_cost[data->speed], 0);
3767 *offset += bitpos / BITS_PER_UNIT;
3768 if (TREE_STATIC (core)
3769 || DECL_EXTERNAL (core))
3771 *symbol_present = true;
3772 *var_present = false;
3773 return zero_cost;
3776 *symbol_present = false;
3777 *var_present = true;
3778 return zero_cost;
3781 /* Estimates cost of expressing difference of addresses E1 - E2 as
3782 var + symbol + offset. The value of offset is added to OFFSET,
3783 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3784 part is missing. DEPENDS_ON is a set of the invariants the computation
3785 depends on. */
3787 static comp_cost
3788 ptr_difference_cost (struct ivopts_data *data,
3789 tree e1, tree e2, bool *symbol_present, bool *var_present,
3790 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3792 HOST_WIDE_INT diff = 0;
3793 aff_tree aff_e1, aff_e2;
3794 tree type;
3796 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3798 if (ptr_difference_const (e1, e2, &diff))
3800 *offset += diff;
3801 *symbol_present = false;
3802 *var_present = false;
3803 return zero_cost;
3806 if (integer_zerop (e2))
3807 return split_address_cost (data, TREE_OPERAND (e1, 0),
3808 symbol_present, var_present, offset, depends_on);
3810 *symbol_present = false;
3811 *var_present = true;
3813 type = signed_type_for (TREE_TYPE (e1));
3814 tree_to_aff_combination (e1, type, &aff_e1);
3815 tree_to_aff_combination (e2, type, &aff_e2);
3816 aff_combination_scale (&aff_e2, double_int_minus_one);
3817 aff_combination_add (&aff_e1, &aff_e2);
3819 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3822 /* Estimates cost of expressing difference E1 - E2 as
3823 var + symbol + offset. The value of offset is added to OFFSET,
3824 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3825 part is missing. DEPENDS_ON is a set of the invariants the computation
3826 depends on. */
3828 static comp_cost
3829 difference_cost (struct ivopts_data *data,
3830 tree e1, tree e2, bool *symbol_present, bool *var_present,
3831 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3833 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3834 unsigned HOST_WIDE_INT off1, off2;
3835 aff_tree aff_e1, aff_e2;
3836 tree type;
3838 e1 = strip_offset (e1, &off1);
3839 e2 = strip_offset (e2, &off2);
3840 *offset += off1 - off2;
3842 STRIP_NOPS (e1);
3843 STRIP_NOPS (e2);
3845 if (TREE_CODE (e1) == ADDR_EXPR)
3846 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3847 offset, depends_on);
3848 *symbol_present = false;
3850 if (operand_equal_p (e1, e2, 0))
3852 *var_present = false;
3853 return zero_cost;
3856 *var_present = true;
3858 if (integer_zerop (e2))
3859 return force_var_cost (data, e1, depends_on);
3861 if (integer_zerop (e1))
3863 comp_cost cost = force_var_cost (data, e2, depends_on);
3864 cost.cost += multiply_by_cost (-1, mode, data->speed);
3865 return cost;
3868 type = signed_type_for (TREE_TYPE (e1));
3869 tree_to_aff_combination (e1, type, &aff_e1);
3870 tree_to_aff_combination (e2, type, &aff_e2);
3871 aff_combination_scale (&aff_e2, double_int_minus_one);
3872 aff_combination_add (&aff_e1, &aff_e2);
3874 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3877 /* Returns true if AFF1 and AFF2 are identical. */
3879 static bool
3880 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3882 unsigned i;
3884 if (aff1->n != aff2->n)
3885 return false;
3887 for (i = 0; i < aff1->n; i++)
3889 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3890 return false;
3892 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3893 return false;
3895 return true;
3898 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3900 static int
3901 get_expr_id (struct ivopts_data *data, tree expr)
3903 struct iv_inv_expr_ent ent;
3904 struct iv_inv_expr_ent **slot;
3906 ent.expr = expr;
3907 ent.hash = iterative_hash_expr (expr, 0);
3908 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3909 &ent, INSERT);
3910 if (*slot)
3911 return (*slot)->id;
3913 *slot = XNEW (struct iv_inv_expr_ent);
3914 (*slot)->expr = expr;
3915 (*slot)->hash = ent.hash;
3916 (*slot)->id = data->inv_expr_id++;
3917 return (*slot)->id;
3920 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3921 requires a new compiler generated temporary. Returns -1 otherwise.
3922 ADDRESS_P is a flag indicating if the expression is for address
3923 computation. */
3925 static int
3926 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3927 tree cbase, HOST_WIDE_INT ratio,
3928 bool address_p)
3930 aff_tree ubase_aff, cbase_aff;
3931 tree expr, ub, cb;
3933 STRIP_NOPS (ubase);
3934 STRIP_NOPS (cbase);
3935 ub = ubase;
3936 cb = cbase;
3938 if ((TREE_CODE (ubase) == INTEGER_CST)
3939 && (TREE_CODE (cbase) == INTEGER_CST))
3940 return -1;
3942 /* Strips the constant part. */
3943 if (TREE_CODE (ubase) == PLUS_EXPR
3944 || TREE_CODE (ubase) == MINUS_EXPR
3945 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3947 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3948 ubase = TREE_OPERAND (ubase, 0);
3951 /* Strips the constant part. */
3952 if (TREE_CODE (cbase) == PLUS_EXPR
3953 || TREE_CODE (cbase) == MINUS_EXPR
3954 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3956 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3957 cbase = TREE_OPERAND (cbase, 0);
3960 if (address_p)
3962 if (((TREE_CODE (ubase) == SSA_NAME)
3963 || (TREE_CODE (ubase) == ADDR_EXPR
3964 && is_gimple_min_invariant (ubase)))
3965 && (TREE_CODE (cbase) == INTEGER_CST))
3966 return -1;
3968 if (((TREE_CODE (cbase) == SSA_NAME)
3969 || (TREE_CODE (cbase) == ADDR_EXPR
3970 && is_gimple_min_invariant (cbase)))
3971 && (TREE_CODE (ubase) == INTEGER_CST))
3972 return -1;
3975 if (ratio == 1)
3977 if(operand_equal_p (ubase, cbase, 0))
3978 return -1;
3980 if (TREE_CODE (ubase) == ADDR_EXPR
3981 && TREE_CODE (cbase) == ADDR_EXPR)
3983 tree usym, csym;
3985 usym = TREE_OPERAND (ubase, 0);
3986 csym = TREE_OPERAND (cbase, 0);
3987 if (TREE_CODE (usym) == ARRAY_REF)
3989 tree ind = TREE_OPERAND (usym, 1);
3990 if (TREE_CODE (ind) == INTEGER_CST
3991 && host_integerp (ind, 0)
3992 && TREE_INT_CST_LOW (ind) == 0)
3993 usym = TREE_OPERAND (usym, 0);
3995 if (TREE_CODE (csym) == ARRAY_REF)
3997 tree ind = TREE_OPERAND (csym, 1);
3998 if (TREE_CODE (ind) == INTEGER_CST
3999 && host_integerp (ind, 0)
4000 && TREE_INT_CST_LOW (ind) == 0)
4001 csym = TREE_OPERAND (csym, 0);
4003 if (operand_equal_p (usym, csym, 0))
4004 return -1;
4006 /* Now do more complex comparison */
4007 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4008 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4009 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4010 return -1;
4013 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4014 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4016 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4017 aff_combination_add (&ubase_aff, &cbase_aff);
4018 expr = aff_combination_to_tree (&ubase_aff);
4019 return get_expr_id (data, expr);
4024 /* Determines the cost of the computation by that USE is expressed
4025 from induction variable CAND. If ADDRESS_P is true, we just need
4026 to create an address from it, otherwise we want to get it into
4027 register. A set of invariants we depend on is stored in
4028 DEPENDS_ON. AT is the statement at that the value is computed.
4029 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4030 addressing is likely. */
4032 static comp_cost
4033 get_computation_cost_at (struct ivopts_data *data,
4034 struct iv_use *use, struct iv_cand *cand,
4035 bool address_p, bitmap *depends_on, gimple at,
4036 bool *can_autoinc,
4037 int *inv_expr_id)
4039 tree ubase = use->iv->base, ustep = use->iv->step;
4040 tree cbase, cstep;
4041 tree utype = TREE_TYPE (ubase), ctype;
4042 unsigned HOST_WIDE_INT cstepi, offset = 0;
4043 HOST_WIDE_INT ratio, aratio;
4044 bool var_present, symbol_present, stmt_is_after_inc;
4045 comp_cost cost;
4046 double_int rat;
4047 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4049 *depends_on = NULL;
4051 /* Only consider real candidates. */
4052 if (!cand->iv)
4053 return infinite_cost;
4055 cbase = cand->iv->base;
4056 cstep = cand->iv->step;
4057 ctype = TREE_TYPE (cbase);
4059 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4061 /* We do not have a precision to express the values of use. */
4062 return infinite_cost;
4065 if (address_p)
4067 /* Do not try to express address of an object with computation based
4068 on address of a different object. This may cause problems in rtl
4069 level alias analysis (that does not expect this to be happening,
4070 as this is illegal in C), and would be unlikely to be useful
4071 anyway. */
4072 if (use->iv->base_object
4073 && cand->iv->base_object
4074 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4075 return infinite_cost;
4078 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4080 /* TODO -- add direct handling of this case. */
4081 goto fallback;
4084 /* CSTEPI is removed from the offset in case statement is after the
4085 increment. If the step is not constant, we use zero instead.
4086 This is a bit imprecise (there is the extra addition), but
4087 redundancy elimination is likely to transform the code so that
4088 it uses value of the variable before increment anyway,
4089 so it is not that much unrealistic. */
4090 if (cst_and_fits_in_hwi (cstep))
4091 cstepi = int_cst_value (cstep);
4092 else
4093 cstepi = 0;
4095 if (!constant_multiple_of (ustep, cstep, &rat))
4096 return infinite_cost;
4098 if (double_int_fits_in_shwi_p (rat))
4099 ratio = double_int_to_shwi (rat);
4100 else
4101 return infinite_cost;
4103 STRIP_NOPS (cbase);
4104 ctype = TREE_TYPE (cbase);
4106 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4108 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4109 or ratio == 1, it is better to handle this like
4111 ubase - ratio * cbase + ratio * var
4113 (also holds in the case ratio == -1, TODO. */
4115 if (cst_and_fits_in_hwi (cbase))
4117 offset = - ratio * int_cst_value (cbase);
4118 cost = difference_cost (data,
4119 ubase, build_int_cst (utype, 0),
4120 &symbol_present, &var_present, &offset,
4121 depends_on);
4122 cost.cost /= avg_loop_niter (data->current_loop);
4124 else if (ratio == 1)
4126 tree real_cbase = cbase;
4128 /* Check to see if any adjustment is needed. */
4129 if (cstepi == 0 && stmt_is_after_inc)
4131 aff_tree real_cbase_aff;
4132 aff_tree cstep_aff;
4134 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4135 &real_cbase_aff);
4136 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4138 aff_combination_add (&real_cbase_aff, &cstep_aff);
4139 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4142 cost = difference_cost (data,
4143 ubase, real_cbase,
4144 &symbol_present, &var_present, &offset,
4145 depends_on);
4146 cost.cost /= avg_loop_niter (data->current_loop);
4148 else if (address_p
4149 && !POINTER_TYPE_P (ctype)
4150 && multiplier_allowed_in_address_p
4151 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4152 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4154 cbase
4155 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4156 cost = difference_cost (data,
4157 ubase, cbase,
4158 &symbol_present, &var_present, &offset,
4159 depends_on);
4160 cost.cost /= avg_loop_niter (data->current_loop);
4162 else
4164 cost = force_var_cost (data, cbase, depends_on);
4165 cost = add_costs (cost,
4166 difference_cost (data,
4167 ubase, build_int_cst (utype, 0),
4168 &symbol_present, &var_present,
4169 &offset, depends_on));
4170 cost.cost /= avg_loop_niter (data->current_loop);
4171 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4174 if (inv_expr_id)
4176 *inv_expr_id =
4177 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4178 /* Clear depends on. */
4179 if (*inv_expr_id != -1 && depends_on && *depends_on)
4180 bitmap_clear (*depends_on);
4183 /* If we are after the increment, the value of the candidate is higher by
4184 one iteration. */
4185 if (stmt_is_after_inc)
4186 offset -= ratio * cstepi;
4188 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4189 (symbol/var1/const parts may be omitted). If we are looking for an
4190 address, find the cost of addressing this. */
4191 if (address_p)
4192 return add_costs (cost,
4193 get_address_cost (symbol_present, var_present,
4194 offset, ratio, cstepi,
4195 TYPE_MODE (TREE_TYPE (utype)),
4196 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4197 speed, stmt_is_after_inc,
4198 can_autoinc));
4200 /* Otherwise estimate the costs for computing the expression. */
4201 if (!symbol_present && !var_present && !offset)
4203 if (ratio != 1)
4204 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4205 return cost;
4208 /* Symbol + offset should be compile-time computable so consider that they
4209 are added once to the variable, if present. */
4210 if (var_present && (symbol_present || offset))
4211 cost.cost += adjust_setup_cost (data,
4212 add_cost (TYPE_MODE (ctype), speed));
4214 /* Having offset does not affect runtime cost in case it is added to
4215 symbol, but it increases complexity. */
4216 if (offset)
4217 cost.complexity++;
4219 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4221 aratio = ratio > 0 ? ratio : -ratio;
4222 if (aratio != 1)
4223 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4224 return cost;
4226 fallback:
4227 if (can_autoinc)
4228 *can_autoinc = false;
4231 /* Just get the expression, expand it and measure the cost. */
4232 tree comp = get_computation_at (data->current_loop, use, cand, at);
4234 if (!comp)
4235 return infinite_cost;
4237 if (address_p)
4238 comp = build_simple_mem_ref (comp);
4240 return new_cost (computation_cost (comp, speed), 0);
4244 /* Determines the cost of the computation by that USE is expressed
4245 from induction variable CAND. If ADDRESS_P is true, we just need
4246 to create an address from it, otherwise we want to get it into
4247 register. A set of invariants we depend on is stored in
4248 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4249 autoinc addressing is likely. */
4251 static comp_cost
4252 get_computation_cost (struct ivopts_data *data,
4253 struct iv_use *use, struct iv_cand *cand,
4254 bool address_p, bitmap *depends_on,
4255 bool *can_autoinc, int *inv_expr_id)
4257 return get_computation_cost_at (data,
4258 use, cand, address_p, depends_on, use->stmt,
4259 can_autoinc, inv_expr_id);
4262 /* Determines cost of basing replacement of USE on CAND in a generic
4263 expression. */
4265 static bool
4266 determine_use_iv_cost_generic (struct ivopts_data *data,
4267 struct iv_use *use, struct iv_cand *cand)
4269 bitmap depends_on;
4270 comp_cost cost;
4271 int inv_expr_id = -1;
4273 /* The simple case first -- if we need to express value of the preserved
4274 original biv, the cost is 0. This also prevents us from counting the
4275 cost of increment twice -- once at this use and once in the cost of
4276 the candidate. */
4277 if (cand->pos == IP_ORIGINAL
4278 && cand->incremented_at == use->stmt)
4280 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4281 return true;
4284 cost = get_computation_cost (data, use, cand, false, &depends_on,
4285 NULL, &inv_expr_id);
4287 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4288 inv_expr_id);
4290 return !infinite_cost_p (cost);
4293 /* Determines cost of basing replacement of USE on CAND in an address. */
4295 static bool
4296 determine_use_iv_cost_address (struct ivopts_data *data,
4297 struct iv_use *use, struct iv_cand *cand)
4299 bitmap depends_on;
4300 bool can_autoinc;
4301 int inv_expr_id = -1;
4302 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4303 &can_autoinc, &inv_expr_id);
4305 if (cand->ainc_use == use)
4307 if (can_autoinc)
4308 cost.cost -= cand->cost_step;
4309 /* If we generated the candidate solely for exploiting autoincrement
4310 opportunities, and it turns out it can't be used, set the cost to
4311 infinity to make sure we ignore it. */
4312 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4313 cost = infinite_cost;
4315 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4316 inv_expr_id);
4318 return !infinite_cost_p (cost);
4321 /* Computes value of candidate CAND at position AT in iteration NITER, and
4322 stores it to VAL. */
4324 static void
4325 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4326 aff_tree *val)
4328 aff_tree step, delta, nit;
4329 struct iv *iv = cand->iv;
4330 tree type = TREE_TYPE (iv->base);
4331 tree steptype = type;
4332 if (POINTER_TYPE_P (type))
4333 steptype = sizetype;
4335 tree_to_aff_combination (iv->step, steptype, &step);
4336 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4337 aff_combination_convert (&nit, steptype);
4338 aff_combination_mult (&nit, &step, &delta);
4339 if (stmt_after_increment (loop, cand, at))
4340 aff_combination_add (&delta, &step);
4342 tree_to_aff_combination (iv->base, type, val);
4343 aff_combination_add (val, &delta);
4346 /* Returns period of induction variable iv. */
4348 static tree
4349 iv_period (struct iv *iv)
4351 tree step = iv->step, period, type;
4352 tree pow2div;
4354 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4356 type = unsigned_type_for (TREE_TYPE (step));
4357 /* Period of the iv is lcm (step, type_range)/step -1,
4358 i.e., N*type_range/step - 1. Since type range is power
4359 of two, N == (step >> num_of_ending_zeros_binary (step),
4360 so the final result is
4362 (type_range >> num_of_ending_zeros_binary (step)) - 1
4365 pow2div = num_ending_zeros (step);
4367 period = build_low_bits_mask (type,
4368 (TYPE_PRECISION (type)
4369 - tree_low_cst (pow2div, 1)));
4371 return period;
4374 /* Returns the comparison operator used when eliminating the iv USE. */
4376 static enum tree_code
4377 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4379 struct loop *loop = data->current_loop;
4380 basic_block ex_bb;
4381 edge exit;
4383 ex_bb = gimple_bb (use->stmt);
4384 exit = EDGE_SUCC (ex_bb, 0);
4385 if (flow_bb_inside_loop_p (loop, exit->dest))
4386 exit = EDGE_SUCC (ex_bb, 1);
4388 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4391 /* Check whether it is possible to express the condition in USE by comparison
4392 of candidate CAND. If so, store the value compared with to BOUND. */
4394 static bool
4395 may_eliminate_iv (struct ivopts_data *data,
4396 struct iv_use *use, struct iv_cand *cand, tree *bound)
4398 basic_block ex_bb;
4399 edge exit;
4400 tree nit, period;
4401 struct loop *loop = data->current_loop;
4402 aff_tree bnd;
4403 struct tree_niter_desc *desc = NULL;
4405 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4406 return false;
4408 /* For now works only for exits that dominate the loop latch.
4409 TODO: extend to other conditions inside loop body. */
4410 ex_bb = gimple_bb (use->stmt);
4411 if (use->stmt != last_stmt (ex_bb)
4412 || gimple_code (use->stmt) != GIMPLE_COND
4413 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4414 return false;
4416 exit = EDGE_SUCC (ex_bb, 0);
4417 if (flow_bb_inside_loop_p (loop, exit->dest))
4418 exit = EDGE_SUCC (ex_bb, 1);
4419 if (flow_bb_inside_loop_p (loop, exit->dest))
4420 return false;
4422 nit = niter_for_exit (data, exit, &desc);
4423 if (!nit)
4424 return false;
4426 /* Determine whether we can use the variable to test the exit condition.
4427 This is the case iff the period of the induction variable is greater
4428 than the number of iterations for which the exit condition is true. */
4429 period = iv_period (cand->iv);
4431 /* If the number of iterations is constant, compare against it directly. */
4432 if (TREE_CODE (nit) == INTEGER_CST)
4434 /* See cand_value_at. */
4435 if (stmt_after_increment (loop, cand, use->stmt))
4437 if (!tree_int_cst_lt (nit, period))
4438 return false;
4440 else
4442 if (tree_int_cst_lt (period, nit))
4443 return false;
4447 /* If not, and if this is the only possible exit of the loop, see whether
4448 we can get a conservative estimate on the number of iterations of the
4449 entire loop and compare against that instead. */
4450 else
4452 double_int period_value, max_niter;
4454 max_niter = desc->max;
4455 if (stmt_after_increment (loop, cand, use->stmt))
4456 max_niter = double_int_add (max_niter, double_int_one);
4457 period_value = tree_to_double_int (period);
4458 if (double_int_ucmp (max_niter, period_value) > 0)
4460 /* See if we can take advantage of infered loop bound information. */
4461 if (loop_only_exit_p (loop, exit))
4463 if (!estimated_loop_iterations (loop, true, &max_niter))
4464 return false;
4465 /* The loop bound is already adjusted by adding 1. */
4466 if (double_int_ucmp (max_niter, period_value) > 0)
4467 return false;
4469 else
4470 return false;
4474 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4476 *bound = aff_combination_to_tree (&bnd);
4477 /* It is unlikely that computing the number of iterations using division
4478 would be more profitable than keeping the original induction variable. */
4479 if (expression_expensive_p (*bound))
4480 return false;
4481 return true;
4484 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4485 be copied, if is is used in the loop body and DATA->body_includes_call. */
4487 static int
4488 parm_decl_cost (struct ivopts_data *data, tree bound)
4490 tree sbound = bound;
4491 STRIP_NOPS (sbound);
4493 if (TREE_CODE (sbound) == SSA_NAME
4494 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4495 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4496 && data->body_includes_call)
4497 return COSTS_N_INSNS (1);
4499 return 0;
4502 /* Determines cost of basing replacement of USE on CAND in a condition. */
4504 static bool
4505 determine_use_iv_cost_condition (struct ivopts_data *data,
4506 struct iv_use *use, struct iv_cand *cand)
4508 tree bound = NULL_TREE;
4509 struct iv *cmp_iv;
4510 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4511 comp_cost elim_cost, express_cost, cost, bound_cost;
4512 bool ok;
4513 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4514 tree *control_var, *bound_cst;
4516 /* Only consider real candidates. */
4517 if (!cand->iv)
4519 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4520 return false;
4523 /* Try iv elimination. */
4524 if (may_eliminate_iv (data, use, cand, &bound))
4526 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4527 if (elim_cost.cost == 0)
4528 elim_cost.cost = parm_decl_cost (data, bound);
4529 else if (TREE_CODE (bound) == INTEGER_CST)
4530 elim_cost.cost = 0;
4531 /* If we replace a loop condition 'i < n' with 'p < base + n',
4532 depends_on_elim will have 'base' and 'n' set, which implies
4533 that both 'base' and 'n' will be live during the loop. More likely,
4534 'base + n' will be loop invariant, resulting in only one live value
4535 during the loop. So in that case we clear depends_on_elim and set
4536 elim_inv_expr_id instead. */
4537 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4539 elim_inv_expr_id = get_expr_id (data, bound);
4540 bitmap_clear (depends_on_elim);
4542 /* The bound is a loop invariant, so it will be only computed
4543 once. */
4544 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4546 else
4547 elim_cost = infinite_cost;
4549 /* Try expressing the original giv. If it is compared with an invariant,
4550 note that we cannot get rid of it. */
4551 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4552 NULL, &cmp_iv);
4553 gcc_assert (ok);
4555 /* When the condition is a comparison of the candidate IV against
4556 zero, prefer this IV.
4558 TODO: The constant that we're substracting from the cost should
4559 be target-dependent. This information should be added to the
4560 target costs for each backend. */
4561 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4562 && integer_zerop (*bound_cst)
4563 && (operand_equal_p (*control_var, cand->var_after, 0)
4564 || operand_equal_p (*control_var, cand->var_before, 0)))
4565 elim_cost.cost -= 1;
4567 express_cost = get_computation_cost (data, use, cand, false,
4568 &depends_on_express, NULL,
4569 &express_inv_expr_id);
4570 fd_ivopts_data = data;
4571 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4573 /* Count the cost of the original bound as well. */
4574 bound_cost = force_var_cost (data, *bound_cst, NULL);
4575 if (bound_cost.cost == 0)
4576 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4577 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4578 bound_cost.cost = 0;
4579 express_cost.cost += bound_cost.cost;
4581 /* Choose the better approach, preferring the eliminated IV. */
4582 if (compare_costs (elim_cost, express_cost) <= 0)
4584 cost = elim_cost;
4585 depends_on = depends_on_elim;
4586 depends_on_elim = NULL;
4587 inv_expr_id = elim_inv_expr_id;
4589 else
4591 cost = express_cost;
4592 depends_on = depends_on_express;
4593 depends_on_express = NULL;
4594 bound = NULL_TREE;
4595 inv_expr_id = express_inv_expr_id;
4598 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4600 if (depends_on_elim)
4601 BITMAP_FREE (depends_on_elim);
4602 if (depends_on_express)
4603 BITMAP_FREE (depends_on_express);
4605 return !infinite_cost_p (cost);
4608 /* Determines cost of basing replacement of USE on CAND. Returns false
4609 if USE cannot be based on CAND. */
4611 static bool
4612 determine_use_iv_cost (struct ivopts_data *data,
4613 struct iv_use *use, struct iv_cand *cand)
4615 switch (use->type)
4617 case USE_NONLINEAR_EXPR:
4618 return determine_use_iv_cost_generic (data, use, cand);
4620 case USE_ADDRESS:
4621 return determine_use_iv_cost_address (data, use, cand);
4623 case USE_COMPARE:
4624 return determine_use_iv_cost_condition (data, use, cand);
4626 default:
4627 gcc_unreachable ();
4631 /* Return true if get_computation_cost indicates that autoincrement is
4632 a possibility for the pair of USE and CAND, false otherwise. */
4634 static bool
4635 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4636 struct iv_cand *cand)
4638 bitmap depends_on;
4639 bool can_autoinc;
4640 comp_cost cost;
4642 if (use->type != USE_ADDRESS)
4643 return false;
4645 cost = get_computation_cost (data, use, cand, true, &depends_on,
4646 &can_autoinc, NULL);
4648 BITMAP_FREE (depends_on);
4650 return !infinite_cost_p (cost) && can_autoinc;
4653 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4654 use that allows autoincrement, and set their AINC_USE if possible. */
4656 static void
4657 set_autoinc_for_original_candidates (struct ivopts_data *data)
4659 unsigned i, j;
4661 for (i = 0; i < n_iv_cands (data); i++)
4663 struct iv_cand *cand = iv_cand (data, i);
4664 struct iv_use *closest = NULL;
4665 if (cand->pos != IP_ORIGINAL)
4666 continue;
4667 for (j = 0; j < n_iv_uses (data); j++)
4669 struct iv_use *use = iv_use (data, j);
4670 unsigned uid = gimple_uid (use->stmt);
4671 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4672 || uid > gimple_uid (cand->incremented_at))
4673 continue;
4674 if (closest == NULL || uid > gimple_uid (closest->stmt))
4675 closest = use;
4677 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4678 continue;
4679 cand->ainc_use = closest;
4683 /* Finds the candidates for the induction variables. */
4685 static void
4686 find_iv_candidates (struct ivopts_data *data)
4688 /* Add commonly used ivs. */
4689 add_standard_iv_candidates (data);
4691 /* Add old induction variables. */
4692 add_old_ivs_candidates (data);
4694 /* Add induction variables derived from uses. */
4695 add_derived_ivs_candidates (data);
4697 set_autoinc_for_original_candidates (data);
4699 /* Record the important candidates. */
4700 record_important_candidates (data);
4703 /* Determines costs of basing the use of the iv on an iv candidate. */
4705 static void
4706 determine_use_iv_costs (struct ivopts_data *data)
4708 unsigned i, j;
4709 struct iv_use *use;
4710 struct iv_cand *cand;
4711 bitmap to_clear = BITMAP_ALLOC (NULL);
4713 alloc_use_cost_map (data);
4715 for (i = 0; i < n_iv_uses (data); i++)
4717 use = iv_use (data, i);
4719 if (data->consider_all_candidates)
4721 for (j = 0; j < n_iv_cands (data); j++)
4723 cand = iv_cand (data, j);
4724 determine_use_iv_cost (data, use, cand);
4727 else
4729 bitmap_iterator bi;
4731 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4733 cand = iv_cand (data, j);
4734 if (!determine_use_iv_cost (data, use, cand))
4735 bitmap_set_bit (to_clear, j);
4738 /* Remove the candidates for that the cost is infinite from
4739 the list of related candidates. */
4740 bitmap_and_compl_into (use->related_cands, to_clear);
4741 bitmap_clear (to_clear);
4745 BITMAP_FREE (to_clear);
4747 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Use-candidate costs:\n");
4751 for (i = 0; i < n_iv_uses (data); i++)
4753 use = iv_use (data, i);
4755 fprintf (dump_file, "Use %d:\n", i);
4756 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4757 for (j = 0; j < use->n_map_members; j++)
4759 if (!use->cost_map[j].cand
4760 || infinite_cost_p (use->cost_map[j].cost))
4761 continue;
4763 fprintf (dump_file, " %d\t%d\t%d\t",
4764 use->cost_map[j].cand->id,
4765 use->cost_map[j].cost.cost,
4766 use->cost_map[j].cost.complexity);
4767 if (use->cost_map[j].depends_on)
4768 bitmap_print (dump_file,
4769 use->cost_map[j].depends_on, "","");
4770 if (use->cost_map[j].inv_expr_id != -1)
4771 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4772 fprintf (dump_file, "\n");
4775 fprintf (dump_file, "\n");
4777 fprintf (dump_file, "\n");
4781 /* Determines cost of the candidate CAND. */
4783 static void
4784 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4786 comp_cost cost_base;
4787 unsigned cost, cost_step;
4788 tree base;
4790 if (!cand->iv)
4792 cand->cost = 0;
4793 return;
4796 /* There are two costs associated with the candidate -- its increment
4797 and its initialization. The second is almost negligible for any loop
4798 that rolls enough, so we take it just very little into account. */
4800 base = cand->iv->base;
4801 cost_base = force_var_cost (data, base, NULL);
4802 /* It will be exceptional that the iv register happens to be initialized with
4803 the proper value at no cost. In general, there will at least be a regcopy
4804 or a const set. */
4805 if (cost_base.cost == 0)
4806 cost_base.cost = COSTS_N_INSNS (1);
4807 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4809 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4811 /* Prefer the original ivs unless we may gain something by replacing it.
4812 The reason is to make debugging simpler; so this is not relevant for
4813 artificial ivs created by other optimization passes. */
4814 if (cand->pos != IP_ORIGINAL
4815 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4816 cost++;
4818 /* Prefer not to insert statements into latch unless there are some
4819 already (so that we do not create unnecessary jumps). */
4820 if (cand->pos == IP_END
4821 && empty_block_p (ip_end_pos (data->current_loop)))
4822 cost++;
4824 cand->cost = cost;
4825 cand->cost_step = cost_step;
4828 /* Determines costs of computation of the candidates. */
4830 static void
4831 determine_iv_costs (struct ivopts_data *data)
4833 unsigned i;
4835 if (dump_file && (dump_flags & TDF_DETAILS))
4837 fprintf (dump_file, "Candidate costs:\n");
4838 fprintf (dump_file, " cand\tcost\n");
4841 for (i = 0; i < n_iv_cands (data); i++)
4843 struct iv_cand *cand = iv_cand (data, i);
4845 determine_iv_cost (data, cand);
4847 if (dump_file && (dump_flags & TDF_DETAILS))
4848 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4851 if (dump_file && (dump_flags & TDF_DETAILS))
4852 fprintf (dump_file, "\n");
4855 /* Calculates cost for having SIZE induction variables. */
4857 static unsigned
4858 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4860 /* We add size to the cost, so that we prefer eliminating ivs
4861 if possible. */
4862 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4863 data->body_includes_call);
4866 /* For each size of the induction variable set determine the penalty. */
4868 static void
4869 determine_set_costs (struct ivopts_data *data)
4871 unsigned j, n;
4872 gimple phi;
4873 gimple_stmt_iterator psi;
4874 tree op;
4875 struct loop *loop = data->current_loop;
4876 bitmap_iterator bi;
4878 if (dump_file && (dump_flags & TDF_DETAILS))
4880 fprintf (dump_file, "Global costs:\n");
4881 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4882 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4883 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4884 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4887 n = 0;
4888 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4890 phi = gsi_stmt (psi);
4891 op = PHI_RESULT (phi);
4893 if (!is_gimple_reg (op))
4894 continue;
4896 if (get_iv (data, op))
4897 continue;
4899 n++;
4902 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4904 struct version_info *info = ver_info (data, j);
4906 if (info->inv_id && info->has_nonlin_use)
4907 n++;
4910 data->regs_used = n;
4911 if (dump_file && (dump_flags & TDF_DETAILS))
4912 fprintf (dump_file, " regs_used %d\n", n);
4914 if (dump_file && (dump_flags & TDF_DETAILS))
4916 fprintf (dump_file, " cost for size:\n");
4917 fprintf (dump_file, " ivs\tcost\n");
4918 for (j = 0; j <= 2 * target_avail_regs; j++)
4919 fprintf (dump_file, " %d\t%d\n", j,
4920 ivopts_global_cost_for_size (data, j));
4921 fprintf (dump_file, "\n");
4925 /* Returns true if A is a cheaper cost pair than B. */
4927 static bool
4928 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4930 int cmp;
4932 if (!a)
4933 return false;
4935 if (!b)
4936 return true;
4938 cmp = compare_costs (a->cost, b->cost);
4939 if (cmp < 0)
4940 return true;
4942 if (cmp > 0)
4943 return false;
4945 /* In case the costs are the same, prefer the cheaper candidate. */
4946 if (a->cand->cost < b->cand->cost)
4947 return true;
4949 return false;
4953 /* Returns candidate by that USE is expressed in IVS. */
4955 static struct cost_pair *
4956 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4958 return ivs->cand_for_use[use->id];
4961 /* Computes the cost field of IVS structure. */
4963 static void
4964 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4966 comp_cost cost = ivs->cand_use_cost;
4968 cost.cost += ivs->cand_cost;
4970 cost.cost += ivopts_global_cost_for_size (data,
4971 ivs->n_regs + ivs->num_used_inv_expr);
4973 ivs->cost = cost;
4976 /* Remove invariants in set INVS to set IVS. */
4978 static void
4979 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4981 bitmap_iterator bi;
4982 unsigned iid;
4984 if (!invs)
4985 return;
4987 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4989 ivs->n_invariant_uses[iid]--;
4990 if (ivs->n_invariant_uses[iid] == 0)
4991 ivs->n_regs--;
4995 /* Set USE not to be expressed by any candidate in IVS. */
4997 static void
4998 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4999 struct iv_use *use)
5001 unsigned uid = use->id, cid;
5002 struct cost_pair *cp;
5004 cp = ivs->cand_for_use[uid];
5005 if (!cp)
5006 return;
5007 cid = cp->cand->id;
5009 ivs->bad_uses++;
5010 ivs->cand_for_use[uid] = NULL;
5011 ivs->n_cand_uses[cid]--;
5013 if (ivs->n_cand_uses[cid] == 0)
5015 bitmap_clear_bit (ivs->cands, cid);
5016 /* Do not count the pseudocandidates. */
5017 if (cp->cand->iv)
5018 ivs->n_regs--;
5019 ivs->n_cands--;
5020 ivs->cand_cost -= cp->cand->cost;
5022 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5025 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5027 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5029 if (cp->inv_expr_id != -1)
5031 ivs->used_inv_expr[cp->inv_expr_id]--;
5032 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5033 ivs->num_used_inv_expr--;
5035 iv_ca_recount_cost (data, ivs);
5038 /* Add invariants in set INVS to set IVS. */
5040 static void
5041 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5043 bitmap_iterator bi;
5044 unsigned iid;
5046 if (!invs)
5047 return;
5049 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5051 ivs->n_invariant_uses[iid]++;
5052 if (ivs->n_invariant_uses[iid] == 1)
5053 ivs->n_regs++;
5057 /* Set cost pair for USE in set IVS to CP. */
5059 static void
5060 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5061 struct iv_use *use, struct cost_pair *cp)
5063 unsigned uid = use->id, cid;
5065 if (ivs->cand_for_use[uid] == cp)
5066 return;
5068 if (ivs->cand_for_use[uid])
5069 iv_ca_set_no_cp (data, ivs, use);
5071 if (cp)
5073 cid = cp->cand->id;
5075 ivs->bad_uses--;
5076 ivs->cand_for_use[uid] = cp;
5077 ivs->n_cand_uses[cid]++;
5078 if (ivs->n_cand_uses[cid] == 1)
5080 bitmap_set_bit (ivs->cands, cid);
5081 /* Do not count the pseudocandidates. */
5082 if (cp->cand->iv)
5083 ivs->n_regs++;
5084 ivs->n_cands++;
5085 ivs->cand_cost += cp->cand->cost;
5087 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5090 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5091 iv_ca_set_add_invariants (ivs, cp->depends_on);
5093 if (cp->inv_expr_id != -1)
5095 ivs->used_inv_expr[cp->inv_expr_id]++;
5096 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5097 ivs->num_used_inv_expr++;
5099 iv_ca_recount_cost (data, ivs);
5103 /* Extend set IVS by expressing USE by some of the candidates in it
5104 if possible. All important candidates will be considered
5105 if IMPORTANT_CANDIDATES is true. */
5107 static void
5108 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5109 struct iv_use *use, bool important_candidates)
5111 struct cost_pair *best_cp = NULL, *cp;
5112 bitmap_iterator bi;
5113 bitmap cands;
5114 unsigned i;
5116 gcc_assert (ivs->upto >= use->id);
5118 if (ivs->upto == use->id)
5120 ivs->upto++;
5121 ivs->bad_uses++;
5124 cands = (important_candidates ? data->important_candidates : ivs->cands);
5125 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5127 struct iv_cand *cand = iv_cand (data, i);
5129 cp = get_use_iv_cost (data, use, cand);
5131 if (cheaper_cost_pair (cp, best_cp))
5132 best_cp = cp;
5135 iv_ca_set_cp (data, ivs, use, best_cp);
5138 /* Get cost for assignment IVS. */
5140 static comp_cost
5141 iv_ca_cost (struct iv_ca *ivs)
5143 /* This was a conditional expression but it triggered a bug in
5144 Sun C 5.5. */
5145 if (ivs->bad_uses)
5146 return infinite_cost;
5147 else
5148 return ivs->cost;
5151 /* Returns true if all dependences of CP are among invariants in IVS. */
5153 static bool
5154 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5156 unsigned i;
5157 bitmap_iterator bi;
5159 if (!cp->depends_on)
5160 return true;
5162 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5164 if (ivs->n_invariant_uses[i] == 0)
5165 return false;
5168 return true;
5171 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5172 it before NEXT_CHANGE. */
5174 static struct iv_ca_delta *
5175 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5176 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5178 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5180 change->use = use;
5181 change->old_cp = old_cp;
5182 change->new_cp = new_cp;
5183 change->next_change = next_change;
5185 return change;
5188 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5189 are rewritten. */
5191 static struct iv_ca_delta *
5192 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5194 struct iv_ca_delta *last;
5196 if (!l2)
5197 return l1;
5199 if (!l1)
5200 return l2;
5202 for (last = l1; last->next_change; last = last->next_change)
5203 continue;
5204 last->next_change = l2;
5206 return l1;
5209 /* Reverse the list of changes DELTA, forming the inverse to it. */
5211 static struct iv_ca_delta *
5212 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5214 struct iv_ca_delta *act, *next, *prev = NULL;
5215 struct cost_pair *tmp;
5217 for (act = delta; act; act = next)
5219 next = act->next_change;
5220 act->next_change = prev;
5221 prev = act;
5223 tmp = act->old_cp;
5224 act->old_cp = act->new_cp;
5225 act->new_cp = tmp;
5228 return prev;
5231 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5232 reverted instead. */
5234 static void
5235 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5236 struct iv_ca_delta *delta, bool forward)
5238 struct cost_pair *from, *to;
5239 struct iv_ca_delta *act;
5241 if (!forward)
5242 delta = iv_ca_delta_reverse (delta);
5244 for (act = delta; act; act = act->next_change)
5246 from = act->old_cp;
5247 to = act->new_cp;
5248 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5249 iv_ca_set_cp (data, ivs, act->use, to);
5252 if (!forward)
5253 iv_ca_delta_reverse (delta);
5256 /* Returns true if CAND is used in IVS. */
5258 static bool
5259 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5261 return ivs->n_cand_uses[cand->id] > 0;
5264 /* Returns number of induction variable candidates in the set IVS. */
5266 static unsigned
5267 iv_ca_n_cands (struct iv_ca *ivs)
5269 return ivs->n_cands;
5272 /* Free the list of changes DELTA. */
5274 static void
5275 iv_ca_delta_free (struct iv_ca_delta **delta)
5277 struct iv_ca_delta *act, *next;
5279 for (act = *delta; act; act = next)
5281 next = act->next_change;
5282 free (act);
5285 *delta = NULL;
5288 /* Allocates new iv candidates assignment. */
5290 static struct iv_ca *
5291 iv_ca_new (struct ivopts_data *data)
5293 struct iv_ca *nw = XNEW (struct iv_ca);
5295 nw->upto = 0;
5296 nw->bad_uses = 0;
5297 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5298 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5299 nw->cands = BITMAP_ALLOC (NULL);
5300 nw->n_cands = 0;
5301 nw->n_regs = 0;
5302 nw->cand_use_cost = zero_cost;
5303 nw->cand_cost = 0;
5304 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5305 nw->cost = zero_cost;
5306 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5307 nw->num_used_inv_expr = 0;
5309 return nw;
5312 /* Free memory occupied by the set IVS. */
5314 static void
5315 iv_ca_free (struct iv_ca **ivs)
5317 free ((*ivs)->cand_for_use);
5318 free ((*ivs)->n_cand_uses);
5319 BITMAP_FREE ((*ivs)->cands);
5320 free ((*ivs)->n_invariant_uses);
5321 free ((*ivs)->used_inv_expr);
5322 free (*ivs);
5323 *ivs = NULL;
5326 /* Dumps IVS to FILE. */
5328 static void
5329 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5331 const char *pref = " invariants ";
5332 unsigned i;
5333 comp_cost cost = iv_ca_cost (ivs);
5335 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5336 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5337 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5338 bitmap_print (file, ivs->cands, " candidates: ","\n");
5340 for (i = 0; i < ivs->upto; i++)
5342 struct iv_use *use = iv_use (data, i);
5343 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5344 if (cp)
5345 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5346 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5347 else
5348 fprintf (file, " use:%d --> ??\n", use->id);
5351 for (i = 1; i <= data->max_inv_id; i++)
5352 if (ivs->n_invariant_uses[i])
5354 fprintf (file, "%s%d", pref, i);
5355 pref = ", ";
5357 fprintf (file, "\n\n");
5360 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5361 new set, and store differences in DELTA. Number of induction variables
5362 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5363 the function will try to find a solution with mimimal iv candidates. */
5365 static comp_cost
5366 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5367 struct iv_cand *cand, struct iv_ca_delta **delta,
5368 unsigned *n_ivs, bool min_ncand)
5370 unsigned i;
5371 comp_cost cost;
5372 struct iv_use *use;
5373 struct cost_pair *old_cp, *new_cp;
5375 *delta = NULL;
5376 for (i = 0; i < ivs->upto; i++)
5378 use = iv_use (data, i);
5379 old_cp = iv_ca_cand_for_use (ivs, use);
5381 if (old_cp
5382 && old_cp->cand == cand)
5383 continue;
5385 new_cp = get_use_iv_cost (data, use, cand);
5386 if (!new_cp)
5387 continue;
5389 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5390 continue;
5392 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5393 continue;
5395 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5398 iv_ca_delta_commit (data, ivs, *delta, true);
5399 cost = iv_ca_cost (ivs);
5400 if (n_ivs)
5401 *n_ivs = iv_ca_n_cands (ivs);
5402 iv_ca_delta_commit (data, ivs, *delta, false);
5404 return cost;
5407 /* Try narrowing set IVS by removing CAND. Return the cost of
5408 the new set and store the differences in DELTA. */
5410 static comp_cost
5411 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5412 struct iv_cand *cand, struct iv_ca_delta **delta)
5414 unsigned i, ci;
5415 struct iv_use *use;
5416 struct cost_pair *old_cp, *new_cp, *cp;
5417 bitmap_iterator bi;
5418 struct iv_cand *cnd;
5419 comp_cost cost;
5421 *delta = NULL;
5422 for (i = 0; i < n_iv_uses (data); i++)
5424 use = iv_use (data, i);
5426 old_cp = iv_ca_cand_for_use (ivs, use);
5427 if (old_cp->cand != cand)
5428 continue;
5430 new_cp = NULL;
5432 if (data->consider_all_candidates)
5434 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5436 if (ci == cand->id)
5437 continue;
5439 cnd = iv_cand (data, ci);
5441 cp = get_use_iv_cost (data, use, cnd);
5442 if (!cp)
5443 continue;
5445 if (!iv_ca_has_deps (ivs, cp))
5446 continue;
5448 if (!cheaper_cost_pair (cp, new_cp))
5449 continue;
5451 new_cp = cp;
5454 else
5456 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5458 if (ci == cand->id)
5459 continue;
5461 cnd = iv_cand (data, ci);
5463 cp = get_use_iv_cost (data, use, cnd);
5464 if (!cp)
5465 continue;
5466 if (!iv_ca_has_deps (ivs, cp))
5467 continue;
5469 if (!cheaper_cost_pair (cp, new_cp))
5470 continue;
5472 new_cp = cp;
5476 if (!new_cp)
5478 iv_ca_delta_free (delta);
5479 return infinite_cost;
5482 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5485 iv_ca_delta_commit (data, ivs, *delta, true);
5486 cost = iv_ca_cost (ivs);
5487 iv_ca_delta_commit (data, ivs, *delta, false);
5489 return cost;
5492 /* Try optimizing the set of candidates IVS by removing candidates different
5493 from to EXCEPT_CAND from it. Return cost of the new set, and store
5494 differences in DELTA. */
5496 static comp_cost
5497 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5498 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5500 bitmap_iterator bi;
5501 struct iv_ca_delta *act_delta, *best_delta;
5502 unsigned i;
5503 comp_cost best_cost, acost;
5504 struct iv_cand *cand;
5506 best_delta = NULL;
5507 best_cost = iv_ca_cost (ivs);
5509 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5511 cand = iv_cand (data, i);
5513 if (cand == except_cand)
5514 continue;
5516 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5518 if (compare_costs (acost, best_cost) < 0)
5520 best_cost = acost;
5521 iv_ca_delta_free (&best_delta);
5522 best_delta = act_delta;
5524 else
5525 iv_ca_delta_free (&act_delta);
5528 if (!best_delta)
5530 *delta = NULL;
5531 return best_cost;
5534 /* Recurse to possibly remove other unnecessary ivs. */
5535 iv_ca_delta_commit (data, ivs, best_delta, true);
5536 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5537 iv_ca_delta_commit (data, ivs, best_delta, false);
5538 *delta = iv_ca_delta_join (best_delta, *delta);
5539 return best_cost;
5542 /* Tries to extend the sets IVS in the best possible way in order
5543 to express the USE. If ORIGINALP is true, prefer candidates from
5544 the original set of IVs, otherwise favor important candidates not
5545 based on any memory object. */
5547 static bool
5548 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5549 struct iv_use *use, bool originalp)
5551 comp_cost best_cost, act_cost;
5552 unsigned i;
5553 bitmap_iterator bi;
5554 struct iv_cand *cand;
5555 struct iv_ca_delta *best_delta = NULL, *act_delta;
5556 struct cost_pair *cp;
5558 iv_ca_add_use (data, ivs, use, false);
5559 best_cost = iv_ca_cost (ivs);
5561 cp = iv_ca_cand_for_use (ivs, use);
5562 if (!cp)
5564 ivs->upto--;
5565 ivs->bad_uses--;
5566 iv_ca_add_use (data, ivs, use, true);
5567 best_cost = iv_ca_cost (ivs);
5568 cp = iv_ca_cand_for_use (ivs, use);
5570 if (cp)
5572 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5573 iv_ca_set_no_cp (data, ivs, use);
5576 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5577 first try important candidates not based on any memory object. Only if
5578 this fails, try the specific ones. Rationale -- in loops with many
5579 variables the best choice often is to use just one generic biv. If we
5580 added here many ivs specific to the uses, the optimization algorithm later
5581 would be likely to get stuck in a local minimum, thus causing us to create
5582 too many ivs. The approach from few ivs to more seems more likely to be
5583 successful -- starting from few ivs, replacing an expensive use by a
5584 specific iv should always be a win. */
5585 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5587 cand = iv_cand (data, i);
5589 if (originalp && cand->pos !=IP_ORIGINAL)
5590 continue;
5592 if (!originalp && cand->iv->base_object != NULL_TREE)
5593 continue;
5595 if (iv_ca_cand_used_p (ivs, cand))
5596 continue;
5598 cp = get_use_iv_cost (data, use, cand);
5599 if (!cp)
5600 continue;
5602 iv_ca_set_cp (data, ivs, use, cp);
5603 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5604 true);
5605 iv_ca_set_no_cp (data, ivs, use);
5606 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5608 if (compare_costs (act_cost, best_cost) < 0)
5610 best_cost = act_cost;
5612 iv_ca_delta_free (&best_delta);
5613 best_delta = act_delta;
5615 else
5616 iv_ca_delta_free (&act_delta);
5619 if (infinite_cost_p (best_cost))
5621 for (i = 0; i < use->n_map_members; i++)
5623 cp = use->cost_map + i;
5624 cand = cp->cand;
5625 if (!cand)
5626 continue;
5628 /* Already tried this. */
5629 if (cand->important)
5631 if (originalp && cand->pos == IP_ORIGINAL)
5632 continue;
5633 if (!originalp && cand->iv->base_object == NULL_TREE)
5634 continue;
5637 if (iv_ca_cand_used_p (ivs, cand))
5638 continue;
5640 act_delta = NULL;
5641 iv_ca_set_cp (data, ivs, use, cp);
5642 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5643 iv_ca_set_no_cp (data, ivs, use);
5644 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5645 cp, act_delta);
5647 if (compare_costs (act_cost, best_cost) < 0)
5649 best_cost = act_cost;
5651 if (best_delta)
5652 iv_ca_delta_free (&best_delta);
5653 best_delta = act_delta;
5655 else
5656 iv_ca_delta_free (&act_delta);
5660 iv_ca_delta_commit (data, ivs, best_delta, true);
5661 iv_ca_delta_free (&best_delta);
5663 return !infinite_cost_p (best_cost);
5666 /* Finds an initial assignment of candidates to uses. */
5668 static struct iv_ca *
5669 get_initial_solution (struct ivopts_data *data, bool originalp)
5671 struct iv_ca *ivs = iv_ca_new (data);
5672 unsigned i;
5674 for (i = 0; i < n_iv_uses (data); i++)
5675 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5677 iv_ca_free (&ivs);
5678 return NULL;
5681 return ivs;
5684 /* Tries to improve set of induction variables IVS. */
5686 static bool
5687 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5689 unsigned i, n_ivs;
5690 comp_cost acost, best_cost = iv_ca_cost (ivs);
5691 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5692 struct iv_cand *cand;
5694 /* Try extending the set of induction variables by one. */
5695 for (i = 0; i < n_iv_cands (data); i++)
5697 cand = iv_cand (data, i);
5699 if (iv_ca_cand_used_p (ivs, cand))
5700 continue;
5702 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5703 if (!act_delta)
5704 continue;
5706 /* If we successfully added the candidate and the set is small enough,
5707 try optimizing it by removing other candidates. */
5708 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5710 iv_ca_delta_commit (data, ivs, act_delta, true);
5711 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5712 iv_ca_delta_commit (data, ivs, act_delta, false);
5713 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5716 if (compare_costs (acost, best_cost) < 0)
5718 best_cost = acost;
5719 iv_ca_delta_free (&best_delta);
5720 best_delta = act_delta;
5722 else
5723 iv_ca_delta_free (&act_delta);
5726 if (!best_delta)
5728 /* Try removing the candidates from the set instead. */
5729 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5731 /* Nothing more we can do. */
5732 if (!best_delta)
5733 return false;
5736 iv_ca_delta_commit (data, ivs, best_delta, true);
5737 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5738 iv_ca_delta_free (&best_delta);
5739 return true;
5742 /* Attempts to find the optimal set of induction variables. We do simple
5743 greedy heuristic -- we try to replace at most one candidate in the selected
5744 solution and remove the unused ivs while this improves the cost. */
5746 static struct iv_ca *
5747 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5749 struct iv_ca *set;
5751 /* Get the initial solution. */
5752 set = get_initial_solution (data, originalp);
5753 if (!set)
5755 if (dump_file && (dump_flags & TDF_DETAILS))
5756 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5757 return NULL;
5760 if (dump_file && (dump_flags & TDF_DETAILS))
5762 fprintf (dump_file, "Initial set of candidates:\n");
5763 iv_ca_dump (data, dump_file, set);
5766 while (try_improve_iv_set (data, set))
5768 if (dump_file && (dump_flags & TDF_DETAILS))
5770 fprintf (dump_file, "Improved to:\n");
5771 iv_ca_dump (data, dump_file, set);
5775 return set;
5778 static struct iv_ca *
5779 find_optimal_iv_set (struct ivopts_data *data)
5781 unsigned i;
5782 struct iv_ca *set, *origset;
5783 struct iv_use *use;
5784 comp_cost cost, origcost;
5786 /* Determine the cost based on a strategy that starts with original IVs,
5787 and try again using a strategy that prefers candidates not based
5788 on any IVs. */
5789 origset = find_optimal_iv_set_1 (data, true);
5790 set = find_optimal_iv_set_1 (data, false);
5792 if (!origset && !set)
5793 return NULL;
5795 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5796 cost = set ? iv_ca_cost (set) : infinite_cost;
5798 if (dump_file && (dump_flags & TDF_DETAILS))
5800 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5801 origcost.cost, origcost.complexity);
5802 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5803 cost.cost, cost.complexity);
5806 /* Choose the one with the best cost. */
5807 if (compare_costs (origcost, cost) <= 0)
5809 if (set)
5810 iv_ca_free (&set);
5811 set = origset;
5813 else if (origset)
5814 iv_ca_free (&origset);
5816 for (i = 0; i < n_iv_uses (data); i++)
5818 use = iv_use (data, i);
5819 use->selected = iv_ca_cand_for_use (set, use)->cand;
5822 return set;
5825 /* Creates a new induction variable corresponding to CAND. */
5827 static void
5828 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5830 gimple_stmt_iterator incr_pos;
5831 tree base;
5832 bool after = false;
5834 if (!cand->iv)
5835 return;
5837 switch (cand->pos)
5839 case IP_NORMAL:
5840 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5841 break;
5843 case IP_END:
5844 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5845 after = true;
5846 break;
5848 case IP_AFTER_USE:
5849 after = true;
5850 /* fall through */
5851 case IP_BEFORE_USE:
5852 incr_pos = gsi_for_stmt (cand->incremented_at);
5853 break;
5855 case IP_ORIGINAL:
5856 /* Mark that the iv is preserved. */
5857 name_info (data, cand->var_before)->preserve_biv = true;
5858 name_info (data, cand->var_after)->preserve_biv = true;
5860 /* Rewrite the increment so that it uses var_before directly. */
5861 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5862 return;
5865 gimple_add_tmp_var (cand->var_before);
5866 add_referenced_var (cand->var_before);
5868 base = unshare_expr (cand->iv->base);
5870 create_iv (base, unshare_expr (cand->iv->step),
5871 cand->var_before, data->current_loop,
5872 &incr_pos, after, &cand->var_before, &cand->var_after);
5875 /* Creates new induction variables described in SET. */
5877 static void
5878 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5880 unsigned i;
5881 struct iv_cand *cand;
5882 bitmap_iterator bi;
5884 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5886 cand = iv_cand (data, i);
5887 create_new_iv (data, cand);
5890 if (dump_file && (dump_flags & TDF_DETAILS))
5892 fprintf (dump_file, "\nSelected IV set: \n");
5893 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5895 cand = iv_cand (data, i);
5896 dump_cand (dump_file, cand);
5898 fprintf (dump_file, "\n");
5902 /* Rewrites USE (definition of iv used in a nonlinear expression)
5903 using candidate CAND. */
5905 static void
5906 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5907 struct iv_use *use, struct iv_cand *cand)
5909 tree comp;
5910 tree op, tgt;
5911 gimple ass;
5912 gimple_stmt_iterator bsi;
5914 /* An important special case -- if we are asked to express value of
5915 the original iv by itself, just exit; there is no need to
5916 introduce a new computation (that might also need casting the
5917 variable to unsigned and back). */
5918 if (cand->pos == IP_ORIGINAL
5919 && cand->incremented_at == use->stmt)
5921 tree step, ctype, utype;
5922 enum tree_code incr_code = PLUS_EXPR, old_code;
5924 gcc_assert (is_gimple_assign (use->stmt));
5925 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5927 step = cand->iv->step;
5928 ctype = TREE_TYPE (step);
5929 utype = TREE_TYPE (cand->var_after);
5930 if (TREE_CODE (step) == NEGATE_EXPR)
5932 incr_code = MINUS_EXPR;
5933 step = TREE_OPERAND (step, 0);
5936 /* Check whether we may leave the computation unchanged.
5937 This is the case only if it does not rely on other
5938 computations in the loop -- otherwise, the computation
5939 we rely upon may be removed in remove_unused_ivs,
5940 thus leading to ICE. */
5941 old_code = gimple_assign_rhs_code (use->stmt);
5942 if (old_code == PLUS_EXPR
5943 || old_code == MINUS_EXPR
5944 || old_code == POINTER_PLUS_EXPR)
5946 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5947 op = gimple_assign_rhs2 (use->stmt);
5948 else if (old_code != MINUS_EXPR
5949 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5950 op = gimple_assign_rhs1 (use->stmt);
5951 else
5952 op = NULL_TREE;
5954 else
5955 op = NULL_TREE;
5957 if (op
5958 && (TREE_CODE (op) == INTEGER_CST
5959 || operand_equal_p (op, step, 0)))
5960 return;
5962 /* Otherwise, add the necessary computations to express
5963 the iv. */
5964 op = fold_convert (ctype, cand->var_before);
5965 comp = fold_convert (utype,
5966 build2 (incr_code, ctype, op,
5967 unshare_expr (step)));
5969 else
5971 comp = get_computation (data->current_loop, use, cand);
5972 gcc_assert (comp != NULL_TREE);
5975 switch (gimple_code (use->stmt))
5977 case GIMPLE_PHI:
5978 tgt = PHI_RESULT (use->stmt);
5980 /* If we should keep the biv, do not replace it. */
5981 if (name_info (data, tgt)->preserve_biv)
5982 return;
5984 bsi = gsi_after_labels (gimple_bb (use->stmt));
5985 break;
5987 case GIMPLE_ASSIGN:
5988 tgt = gimple_assign_lhs (use->stmt);
5989 bsi = gsi_for_stmt (use->stmt);
5990 break;
5992 default:
5993 gcc_unreachable ();
5996 if (!valid_gimple_rhs_p (comp)
5997 || (gimple_code (use->stmt) != GIMPLE_PHI
5998 /* We can't allow re-allocating the stmt as it might be pointed
5999 to still. */
6000 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6001 >= gimple_num_ops (gsi_stmt (bsi)))))
6003 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6004 true, GSI_SAME_STMT);
6005 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6007 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6008 /* As this isn't a plain copy we have to reset alignment
6009 information. */
6010 if (SSA_NAME_PTR_INFO (comp))
6012 SSA_NAME_PTR_INFO (comp)->align = 1;
6013 SSA_NAME_PTR_INFO (comp)->misalign = 0;
6018 if (gimple_code (use->stmt) == GIMPLE_PHI)
6020 ass = gimple_build_assign (tgt, comp);
6021 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6023 bsi = gsi_for_stmt (use->stmt);
6024 remove_phi_node (&bsi, false);
6026 else
6028 gimple_assign_set_rhs_from_tree (&bsi, comp);
6029 use->stmt = gsi_stmt (bsi);
6033 /* Copies the reference information from OLD_REF to NEW_REF. */
6035 static void
6036 copy_ref_info (tree new_ref, tree old_ref)
6038 tree new_ptr_base = NULL_TREE;
6040 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
6041 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
6043 new_ptr_base = TREE_OPERAND (new_ref, 0);
6045 /* We can transfer points-to information from an old pointer
6046 or decl base to the new one. */
6047 if (new_ptr_base
6048 && TREE_CODE (new_ptr_base) == SSA_NAME
6049 && !SSA_NAME_PTR_INFO (new_ptr_base))
6051 tree base = get_base_address (old_ref);
6052 if (!base)
6054 else if ((TREE_CODE (base) == MEM_REF
6055 || TREE_CODE (base) == TARGET_MEM_REF)
6056 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
6057 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
6059 struct ptr_info_def *new_pi;
6060 duplicate_ssa_name_ptr_info
6061 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
6062 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
6063 /* We have to be careful about transfering alignment information. */
6064 if (TREE_CODE (old_ref) == MEM_REF
6065 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
6066 && (TMR_INDEX2 (new_ref)
6067 || (TMR_STEP (new_ref)
6068 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
6069 < new_pi->align)))))
6071 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
6072 mem_ref_offset (new_ref)).low;
6073 new_pi->misalign &= (new_pi->align - 1);
6075 else
6077 new_pi->align = 1;
6078 new_pi->misalign = 0;
6081 else if (TREE_CODE (base) == VAR_DECL
6082 || TREE_CODE (base) == PARM_DECL
6083 || TREE_CODE (base) == RESULT_DECL)
6085 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
6086 pt_solution_set_var (&pi->pt, base);
6091 /* Performs a peephole optimization to reorder the iv update statement with
6092 a mem ref to enable instruction combining in later phases. The mem ref uses
6093 the iv value before the update, so the reordering transformation requires
6094 adjustment of the offset. CAND is the selected IV_CAND.
6096 Example:
6098 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6099 iv2 = iv1 + 1;
6101 if (t < val) (1)
6102 goto L;
6103 goto Head;
6106 directly propagating t over to (1) will introduce overlapping live range
6107 thus increase register pressure. This peephole transform it into:
6110 iv2 = iv1 + 1;
6111 t = MEM_REF (base, iv2, 8, 8);
6112 if (t < val)
6113 goto L;
6114 goto Head;
6117 static void
6118 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6120 tree var_after;
6121 gimple iv_update, stmt;
6122 basic_block bb;
6123 gimple_stmt_iterator gsi, gsi_iv;
6125 if (cand->pos != IP_NORMAL)
6126 return;
6128 var_after = cand->var_after;
6129 iv_update = SSA_NAME_DEF_STMT (var_after);
6131 bb = gimple_bb (iv_update);
6132 gsi = gsi_last_nondebug_bb (bb);
6133 stmt = gsi_stmt (gsi);
6135 /* Only handle conditional statement for now. */
6136 if (gimple_code (stmt) != GIMPLE_COND)
6137 return;
6139 gsi_prev_nondebug (&gsi);
6140 stmt = gsi_stmt (gsi);
6141 if (stmt != iv_update)
6142 return;
6144 gsi_prev_nondebug (&gsi);
6145 if (gsi_end_p (gsi))
6146 return;
6148 stmt = gsi_stmt (gsi);
6149 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6150 return;
6152 if (stmt != use->stmt)
6153 return;
6155 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6156 return;
6158 if (dump_file && (dump_flags & TDF_DETAILS))
6160 fprintf (dump_file, "Reordering \n");
6161 print_gimple_stmt (dump_file, iv_update, 0, 0);
6162 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6163 fprintf (dump_file, "\n");
6166 gsi = gsi_for_stmt (use->stmt);
6167 gsi_iv = gsi_for_stmt (iv_update);
6168 gsi_move_before (&gsi_iv, &gsi);
6170 cand->pos = IP_BEFORE_USE;
6171 cand->incremented_at = use->stmt;
6174 /* Rewrites USE (address that is an iv) using candidate CAND. */
6176 static void
6177 rewrite_use_address (struct ivopts_data *data,
6178 struct iv_use *use, struct iv_cand *cand)
6180 aff_tree aff;
6181 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6182 tree base_hint = NULL_TREE;
6183 tree ref, iv;
6184 bool ok;
6186 adjust_iv_update_pos (cand, use);
6187 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6188 gcc_assert (ok);
6189 unshare_aff_combination (&aff);
6191 /* To avoid undefined overflow problems, all IV candidates use unsigned
6192 integer types. The drawback is that this makes it impossible for
6193 create_mem_ref to distinguish an IV that is based on a memory object
6194 from one that represents simply an offset.
6196 To work around this problem, we pass a hint to create_mem_ref that
6197 indicates which variable (if any) in aff is an IV based on a memory
6198 object. Note that we only consider the candidate. If this is not
6199 based on an object, the base of the reference is in some subexpression
6200 of the use -- but these will use pointer types, so they are recognized
6201 by the create_mem_ref heuristics anyway. */
6202 if (cand->iv->base_object)
6203 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6205 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6206 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6207 reference_alias_ptr_type (*use->op_p),
6208 iv, base_hint, data->speed);
6209 copy_ref_info (ref, *use->op_p);
6210 *use->op_p = ref;
6213 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6214 candidate CAND. */
6216 static void
6217 rewrite_use_compare (struct ivopts_data *data,
6218 struct iv_use *use, struct iv_cand *cand)
6220 tree comp, *var_p, op, bound;
6221 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6222 enum tree_code compare;
6223 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6224 bool ok;
6226 bound = cp->value;
6227 if (bound)
6229 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6230 tree var_type = TREE_TYPE (var);
6231 gimple_seq stmts;
6233 if (dump_file && (dump_flags & TDF_DETAILS))
6235 fprintf (dump_file, "Replacing exit test: ");
6236 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6238 compare = iv_elimination_compare (data, use);
6239 bound = unshare_expr (fold_convert (var_type, bound));
6240 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6241 if (stmts)
6242 gsi_insert_seq_on_edge_immediate (
6243 loop_preheader_edge (data->current_loop),
6244 stmts);
6246 gimple_cond_set_lhs (use->stmt, var);
6247 gimple_cond_set_code (use->stmt, compare);
6248 gimple_cond_set_rhs (use->stmt, op);
6249 return;
6252 /* The induction variable elimination failed; just express the original
6253 giv. */
6254 comp = get_computation (data->current_loop, use, cand);
6255 gcc_assert (comp != NULL_TREE);
6257 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6258 gcc_assert (ok);
6260 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6261 true, GSI_SAME_STMT);
6264 /* Rewrites USE using candidate CAND. */
6266 static void
6267 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6269 switch (use->type)
6271 case USE_NONLINEAR_EXPR:
6272 rewrite_use_nonlinear_expr (data, use, cand);
6273 break;
6275 case USE_ADDRESS:
6276 rewrite_use_address (data, use, cand);
6277 break;
6279 case USE_COMPARE:
6280 rewrite_use_compare (data, use, cand);
6281 break;
6283 default:
6284 gcc_unreachable ();
6287 update_stmt (use->stmt);
6290 /* Rewrite the uses using the selected induction variables. */
6292 static void
6293 rewrite_uses (struct ivopts_data *data)
6295 unsigned i;
6296 struct iv_cand *cand;
6297 struct iv_use *use;
6299 for (i = 0; i < n_iv_uses (data); i++)
6301 use = iv_use (data, i);
6302 cand = use->selected;
6303 gcc_assert (cand);
6305 rewrite_use (data, use, cand);
6309 /* Removes the ivs that are not used after rewriting. */
6311 static void
6312 remove_unused_ivs (struct ivopts_data *data)
6314 unsigned j;
6315 bitmap_iterator bi;
6316 bitmap toremove = BITMAP_ALLOC (NULL);
6318 /* Figure out an order in which to release SSA DEFs so that we don't
6319 release something that we'd have to propagate into a debug stmt
6320 afterwards. */
6321 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6323 struct version_info *info;
6325 info = ver_info (data, j);
6326 if (info->iv
6327 && !integer_zerop (info->iv->step)
6328 && !info->inv_id
6329 && !info->iv->have_use_for
6330 && !info->preserve_biv)
6331 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6334 release_defs_bitset (toremove);
6336 BITMAP_FREE (toremove);
6339 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6340 for pointer_map_traverse. */
6342 static bool
6343 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6344 void *data ATTRIBUTE_UNUSED)
6346 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6348 free (niter);
6349 return true;
6352 /* Frees data allocated by the optimization of a single loop. */
6354 static void
6355 free_loop_data (struct ivopts_data *data)
6357 unsigned i, j;
6358 bitmap_iterator bi;
6359 tree obj;
6361 if (data->niters)
6363 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6364 pointer_map_destroy (data->niters);
6365 data->niters = NULL;
6368 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6370 struct version_info *info;
6372 info = ver_info (data, i);
6373 free (info->iv);
6374 info->iv = NULL;
6375 info->has_nonlin_use = false;
6376 info->preserve_biv = false;
6377 info->inv_id = 0;
6379 bitmap_clear (data->relevant);
6380 bitmap_clear (data->important_candidates);
6382 for (i = 0; i < n_iv_uses (data); i++)
6384 struct iv_use *use = iv_use (data, i);
6386 free (use->iv);
6387 BITMAP_FREE (use->related_cands);
6388 for (j = 0; j < use->n_map_members; j++)
6389 if (use->cost_map[j].depends_on)
6390 BITMAP_FREE (use->cost_map[j].depends_on);
6391 free (use->cost_map);
6392 free (use);
6394 VEC_truncate (iv_use_p, data->iv_uses, 0);
6396 for (i = 0; i < n_iv_cands (data); i++)
6398 struct iv_cand *cand = iv_cand (data, i);
6400 free (cand->iv);
6401 if (cand->depends_on)
6402 BITMAP_FREE (cand->depends_on);
6403 free (cand);
6405 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6407 if (data->version_info_size < num_ssa_names)
6409 data->version_info_size = 2 * num_ssa_names;
6410 free (data->version_info);
6411 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6414 data->max_inv_id = 0;
6416 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6417 SET_DECL_RTL (obj, NULL_RTX);
6419 VEC_truncate (tree, decl_rtl_to_reset, 0);
6421 htab_empty (data->inv_expr_tab);
6422 data->inv_expr_id = 0;
6425 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6426 loop tree. */
6428 static void
6429 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6431 free_loop_data (data);
6432 free (data->version_info);
6433 BITMAP_FREE (data->relevant);
6434 BITMAP_FREE (data->important_candidates);
6436 VEC_free (tree, heap, decl_rtl_to_reset);
6437 VEC_free (iv_use_p, heap, data->iv_uses);
6438 VEC_free (iv_cand_p, heap, data->iv_candidates);
6439 htab_delete (data->inv_expr_tab);
6442 /* Returns true if the loop body BODY includes any function calls. */
6444 static bool
6445 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6447 gimple_stmt_iterator gsi;
6448 unsigned i;
6450 for (i = 0; i < num_nodes; i++)
6451 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6453 gimple stmt = gsi_stmt (gsi);
6454 if (is_gimple_call (stmt)
6455 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6456 return true;
6458 return false;
6461 /* Optimizes the LOOP. Returns true if anything changed. */
6463 static bool
6464 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6466 bool changed = false;
6467 struct iv_ca *iv_ca;
6468 edge exit;
6469 basic_block *body;
6471 gcc_assert (!data->niters);
6472 data->current_loop = loop;
6473 data->speed = optimize_loop_for_speed_p (loop);
6475 if (dump_file && (dump_flags & TDF_DETAILS))
6477 fprintf (dump_file, "Processing loop %d\n", loop->num);
6479 exit = single_dom_exit (loop);
6480 if (exit)
6482 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6483 exit->src->index, exit->dest->index);
6484 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6485 fprintf (dump_file, "\n");
6488 fprintf (dump_file, "\n");
6491 body = get_loop_body (loop);
6492 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6493 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6494 free (body);
6496 /* For each ssa name determines whether it behaves as an induction variable
6497 in some loop. */
6498 if (!find_induction_variables (data))
6499 goto finish;
6501 /* Finds interesting uses (item 1). */
6502 find_interesting_uses (data);
6503 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6504 goto finish;
6506 /* Finds candidates for the induction variables (item 2). */
6507 find_iv_candidates (data);
6509 /* Calculates the costs (item 3, part 1). */
6510 determine_iv_costs (data);
6511 determine_use_iv_costs (data);
6512 determine_set_costs (data);
6514 /* Find the optimal set of induction variables (item 3, part 2). */
6515 iv_ca = find_optimal_iv_set (data);
6516 if (!iv_ca)
6517 goto finish;
6518 changed = true;
6520 /* Create the new induction variables (item 4, part 1). */
6521 create_new_ivs (data, iv_ca);
6522 iv_ca_free (&iv_ca);
6524 /* Rewrite the uses (item 4, part 2). */
6525 rewrite_uses (data);
6527 /* Remove the ivs that are unused after rewriting. */
6528 remove_unused_ivs (data);
6530 /* We have changed the structure of induction variables; it might happen
6531 that definitions in the scev database refer to some of them that were
6532 eliminated. */
6533 scev_reset ();
6535 finish:
6536 free_loop_data (data);
6538 return changed;
6541 /* Main entry point. Optimizes induction variables in loops. */
6543 void
6544 tree_ssa_iv_optimize (void)
6546 struct loop *loop;
6547 struct ivopts_data data;
6548 loop_iterator li;
6550 tree_ssa_iv_optimize_init (&data);
6552 /* Optimize the loops starting with the innermost ones. */
6553 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6555 if (dump_file && (dump_flags & TDF_DETAILS))
6556 flow_loop_dump (loop, dump_file, NULL, 1);
6558 tree_ssa_iv_optimize_loop (&data, loop);
6561 tree_ssa_iv_optimize_finalize (&data);