2010-07-29 Tobias Burnus <burnus@net-b.de>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1d65b4aadfa08e85ce5ad08a1378928d068e3e08
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* Total cost of the assignment. */
331 comp_cost cost;
334 /* Difference of two iv candidate assignments. */
336 struct iv_ca_delta
338 /* Changed use. */
339 struct iv_use *use;
341 /* An old assignment (for rollback purposes). */
342 struct cost_pair *old_cp;
344 /* A new assignment. */
345 struct cost_pair *new_cp;
347 /* Next change in the list. */
348 struct iv_ca_delta *next_change;
351 /* Bound on number of candidates below that all candidates are considered. */
353 #define CONSIDER_ALL_CANDIDATES_BOUND \
354 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
356 /* If there are more iv occurrences, we just give up (it is quite unlikely that
357 optimizing such a loop would help, and it would take ages). */
359 #define MAX_CONSIDERED_USES \
360 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
362 /* If there are at most this number of ivs in the set, try removing unnecessary
363 ivs from the set always. */
365 #define ALWAYS_PRUNE_CAND_SET_BOUND \
366 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
368 /* The list of trees for that the decl_rtl field must be reset is stored
369 here. */
371 static VEC(tree,heap) *decl_rtl_to_reset;
373 /* Number of uses recorded in DATA. */
375 static inline unsigned
376 n_iv_uses (struct ivopts_data *data)
378 return VEC_length (iv_use_p, data->iv_uses);
381 /* Ith use recorded in DATA. */
383 static inline struct iv_use *
384 iv_use (struct ivopts_data *data, unsigned i)
386 return VEC_index (iv_use_p, data->iv_uses, i);
389 /* Number of candidates recorded in DATA. */
391 static inline unsigned
392 n_iv_cands (struct ivopts_data *data)
394 return VEC_length (iv_cand_p, data->iv_candidates);
397 /* Ith candidate recorded in DATA. */
399 static inline struct iv_cand *
400 iv_cand (struct ivopts_data *data, unsigned i)
402 return VEC_index (iv_cand_p, data->iv_candidates, i);
405 /* The single loop exit if it dominates the latch, NULL otherwise. */
407 edge
408 single_dom_exit (struct loop *loop)
410 edge exit = single_exit (loop);
412 if (!exit)
413 return NULL;
415 if (!just_once_each_iteration_p (loop, exit->src))
416 return NULL;
418 return exit;
421 /* Dumps information about the induction variable IV to FILE. */
423 extern void dump_iv (FILE *, struct iv *);
424 void
425 dump_iv (FILE *file, struct iv *iv)
427 if (iv->ssa_name)
429 fprintf (file, "ssa name ");
430 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
431 fprintf (file, "\n");
434 fprintf (file, " type ");
435 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
436 fprintf (file, "\n");
438 if (iv->step)
440 fprintf (file, " base ");
441 print_generic_expr (file, iv->base, TDF_SLIM);
442 fprintf (file, "\n");
444 fprintf (file, " step ");
445 print_generic_expr (file, iv->step, TDF_SLIM);
446 fprintf (file, "\n");
448 else
450 fprintf (file, " invariant ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
455 if (iv->base_object)
457 fprintf (file, " base object ");
458 print_generic_expr (file, iv->base_object, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->biv_p)
463 fprintf (file, " is a biv\n");
466 /* Dumps information about the USE to FILE. */
468 extern void dump_use (FILE *, struct iv_use *);
469 void
470 dump_use (FILE *file, struct iv_use *use)
472 fprintf (file, "use %d\n", use->id);
474 switch (use->type)
476 case USE_NONLINEAR_EXPR:
477 fprintf (file, " generic\n");
478 break;
480 case USE_ADDRESS:
481 fprintf (file, " address\n");
482 break;
484 case USE_COMPARE:
485 fprintf (file, " compare\n");
486 break;
488 default:
489 gcc_unreachable ();
492 fprintf (file, " in statement ");
493 print_gimple_stmt (file, use->stmt, 0, 0);
494 fprintf (file, "\n");
496 fprintf (file, " at position ");
497 if (use->op_p)
498 print_generic_expr (file, *use->op_p, TDF_SLIM);
499 fprintf (file, "\n");
501 dump_iv (file, use->iv);
503 if (use->related_cands)
505 fprintf (file, " related candidates ");
506 dump_bitmap (file, use->related_cands);
510 /* Dumps information about the uses to FILE. */
512 extern void dump_uses (FILE *, struct ivopts_data *);
513 void
514 dump_uses (FILE *file, struct ivopts_data *data)
516 unsigned i;
517 struct iv_use *use;
519 for (i = 0; i < n_iv_uses (data); i++)
521 use = iv_use (data, i);
523 dump_use (file, use);
524 fprintf (file, "\n");
528 /* Dumps information about induction variable candidate CAND to FILE. */
530 extern void dump_cand (FILE *, struct iv_cand *);
531 void
532 dump_cand (FILE *file, struct iv_cand *cand)
534 struct iv *iv = cand->iv;
536 fprintf (file, "candidate %d%s\n",
537 cand->id, cand->important ? " (important)" : "");
539 if (cand->depends_on)
541 fprintf (file, " depends on ");
542 dump_bitmap (file, cand->depends_on);
545 if (!iv)
547 fprintf (file, " final value replacement\n");
548 return;
551 if (cand->var_before)
553 fprintf (file, " var_before ");
554 print_generic_expr (file, cand->var_before, TDF_SLIM);
555 fprintf (file, "\n");
557 if (cand->var_after)
559 fprintf (file, " var_after ");
560 print_generic_expr (file, cand->var_after, TDF_SLIM);
561 fprintf (file, "\n");
564 switch (cand->pos)
566 case IP_NORMAL:
567 fprintf (file, " incremented before exit test\n");
568 break;
570 case IP_BEFORE_USE:
571 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
572 break;
574 case IP_AFTER_USE:
575 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
576 break;
578 case IP_END:
579 fprintf (file, " incremented at end\n");
580 break;
582 case IP_ORIGINAL:
583 fprintf (file, " original biv\n");
584 break;
587 dump_iv (file, iv);
590 /* Returns the info for ssa version VER. */
592 static inline struct version_info *
593 ver_info (struct ivopts_data *data, unsigned ver)
595 return data->version_info + ver;
598 /* Returns the info for ssa name NAME. */
600 static inline struct version_info *
601 name_info (struct ivopts_data *data, tree name)
603 return ver_info (data, SSA_NAME_VERSION (name));
606 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
607 emitted in LOOP. */
609 static bool
610 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
612 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
614 gcc_assert (bb);
616 if (sbb == loop->latch)
617 return true;
619 if (sbb != bb)
620 return false;
622 return stmt == last_stmt (bb);
625 /* Returns true if STMT if after the place where the original induction
626 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
627 if the positions are identical. */
629 static bool
630 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
632 basic_block cand_bb = gimple_bb (cand->incremented_at);
633 basic_block stmt_bb = gimple_bb (stmt);
635 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
636 return false;
638 if (stmt_bb != cand_bb)
639 return true;
641 if (true_if_equal
642 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
643 return true;
644 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
647 /* Returns true if STMT if after the place where the induction variable
648 CAND is incremented in LOOP. */
650 static bool
651 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
653 switch (cand->pos)
655 case IP_END:
656 return false;
658 case IP_NORMAL:
659 return stmt_after_ip_normal_pos (loop, stmt);
661 case IP_ORIGINAL:
662 case IP_AFTER_USE:
663 return stmt_after_inc_pos (cand, stmt, false);
665 case IP_BEFORE_USE:
666 return stmt_after_inc_pos (cand, stmt, true);
668 default:
669 gcc_unreachable ();
673 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
675 static bool
676 abnormal_ssa_name_p (tree exp)
678 if (!exp)
679 return false;
681 if (TREE_CODE (exp) != SSA_NAME)
682 return false;
684 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
687 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
688 abnormal phi node. Callback for for_each_index. */
690 static bool
691 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
692 void *data ATTRIBUTE_UNUSED)
694 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
696 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
697 return false;
698 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
699 return false;
702 return !abnormal_ssa_name_p (*index);
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
708 bool
709 contains_abnormal_ssa_name_p (tree expr)
711 enum tree_code code;
712 enum tree_code_class codeclass;
714 if (!expr)
715 return false;
717 code = TREE_CODE (expr);
718 codeclass = TREE_CODE_CLASS (code);
720 if (code == SSA_NAME)
721 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
723 if (code == INTEGER_CST
724 || is_gimple_min_invariant (expr))
725 return false;
727 if (code == ADDR_EXPR)
728 return !for_each_index (&TREE_OPERAND (expr, 0),
729 idx_contains_abnormal_ssa_name_p,
730 NULL);
732 if (code == COND_EXPR)
733 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
734 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
735 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
737 switch (codeclass)
739 case tcc_binary:
740 case tcc_comparison:
741 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
742 return true;
744 /* Fallthru. */
745 case tcc_unary:
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
747 return true;
749 break;
751 default:
752 gcc_unreachable ();
755 return false;
758 /* Returns tree describing number of iterations determined from
759 EXIT of DATA->current_loop, or NULL if something goes wrong. */
761 static tree
762 niter_for_exit (struct ivopts_data *data, edge exit,
763 struct tree_niter_desc **desc_p)
765 struct tree_niter_desc* desc = NULL;
766 tree niter;
767 void **slot;
769 if (!data->niters)
771 data->niters = pointer_map_create ();
772 slot = NULL;
774 else
775 slot = pointer_map_contains (data->niters, exit);
777 if (!slot)
779 /* Try to determine number of iterations. We must know it
780 unconditionally (i.e., without possibility of # of iterations
781 being zero). Also, we cannot safely work with ssa names that
782 appear in phi nodes on abnormal edges, so that we do not create
783 overlapping life ranges for them (PR 27283). */
784 desc = XNEW (struct tree_niter_desc);
785 if (number_of_iterations_exit (data->current_loop,
786 exit, desc, true)
787 && integer_zerop (desc->may_be_zero)
788 && !contains_abnormal_ssa_name_p (desc->niter))
789 niter = desc->niter;
790 else
791 niter = NULL_TREE;
793 desc->niter = niter;
794 slot = pointer_map_insert (data->niters, exit);
795 *slot = desc;
797 else
798 niter = ((struct tree_niter_desc *) *slot)->niter;
800 if (desc_p)
801 *desc_p = (struct tree_niter_desc *) *slot;
802 return niter;
805 /* Returns tree describing number of iterations determined from
806 single dominating exit of DATA->current_loop, or NULL if something
807 goes wrong. */
809 static tree
810 niter_for_single_dom_exit (struct ivopts_data *data)
812 edge exit = single_dom_exit (data->current_loop);
814 if (!exit)
815 return NULL;
817 return niter_for_exit (data, exit, NULL);
820 /* Hash table equality function for expressions. */
822 static int
823 htab_inv_expr_eq (const void *ent1, const void *ent2)
825 const struct iv_inv_expr_ent *expr1 =
826 (const struct iv_inv_expr_ent *)ent1;
827 const struct iv_inv_expr_ent *expr2 =
828 (const struct iv_inv_expr_ent *)ent2;
830 return operand_equal_p (expr1->expr, expr2->expr, 0);
833 /* Hash function for loop invariant expressions. */
835 static hashval_t
836 htab_inv_expr_hash (const void *ent)
838 const struct iv_inv_expr_ent *expr =
839 (const struct iv_inv_expr_ent *)ent;
840 return expr->hash;
843 /* Initializes data structures used by the iv optimization pass, stored
844 in DATA. */
846 static void
847 tree_ssa_iv_optimize_init (struct ivopts_data *data)
849 data->version_info_size = 2 * num_ssa_names;
850 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
851 data->relevant = BITMAP_ALLOC (NULL);
852 data->important_candidates = BITMAP_ALLOC (NULL);
853 data->max_inv_id = 0;
854 data->niters = NULL;
855 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
856 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
857 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
858 htab_inv_expr_eq, free);
859 data->inv_expr_id = 0;
860 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
863 /* Returns a memory object to that EXPR points. In case we are able to
864 determine that it does not point to any such object, NULL is returned. */
866 static tree
867 determine_base_object (tree expr)
869 enum tree_code code = TREE_CODE (expr);
870 tree base, obj;
872 /* If this is a pointer casted to any type, we need to determine
873 the base object for the pointer; so handle conversions before
874 throwing away non-pointer expressions. */
875 if (CONVERT_EXPR_P (expr))
876 return determine_base_object (TREE_OPERAND (expr, 0));
878 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
879 return NULL_TREE;
881 switch (code)
883 case INTEGER_CST:
884 return NULL_TREE;
886 case ADDR_EXPR:
887 obj = TREE_OPERAND (expr, 0);
888 base = get_base_address (obj);
890 if (!base)
891 return expr;
893 if (TREE_CODE (base) == MEM_REF)
894 return determine_base_object (TREE_OPERAND (base, 0));
896 return fold_convert (ptr_type_node,
897 build_fold_addr_expr (base));
899 case POINTER_PLUS_EXPR:
900 return determine_base_object (TREE_OPERAND (expr, 0));
902 case PLUS_EXPR:
903 case MINUS_EXPR:
904 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
905 gcc_unreachable ();
907 default:
908 return fold_convert (ptr_type_node, expr);
912 /* Allocates an induction variable with given initial value BASE and step STEP
913 for loop LOOP. */
915 static struct iv *
916 alloc_iv (tree base, tree step)
918 struct iv *iv = XCNEW (struct iv);
919 gcc_assert (step != NULL_TREE);
921 iv->base = base;
922 iv->base_object = determine_base_object (base);
923 iv->step = step;
924 iv->biv_p = false;
925 iv->have_use_for = false;
926 iv->use_id = 0;
927 iv->ssa_name = NULL_TREE;
929 return iv;
932 /* Sets STEP and BASE for induction variable IV. */
934 static void
935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
937 struct version_info *info = name_info (data, iv);
939 gcc_assert (!info->iv);
941 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
942 info->iv = alloc_iv (base, step);
943 info->iv->ssa_name = iv;
946 /* Finds induction variable declaration for VAR. */
948 static struct iv *
949 get_iv (struct ivopts_data *data, tree var)
951 basic_block bb;
952 tree type = TREE_TYPE (var);
954 if (!POINTER_TYPE_P (type)
955 && !INTEGRAL_TYPE_P (type))
956 return NULL;
958 if (!name_info (data, var)->iv)
960 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
962 if (!bb
963 || !flow_bb_inside_loop_p (data->current_loop, bb))
964 set_iv (data, var, var, build_int_cst (type, 0));
967 return name_info (data, var)->iv;
970 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
971 not define a simple affine biv with nonzero step. */
973 static tree
974 determine_biv_step (gimple phi)
976 struct loop *loop = gimple_bb (phi)->loop_father;
977 tree name = PHI_RESULT (phi);
978 affine_iv iv;
980 if (!is_gimple_reg (name))
981 return NULL_TREE;
983 if (!simple_iv (loop, loop, name, &iv, true))
984 return NULL_TREE;
986 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
989 /* Finds basic ivs. */
991 static bool
992 find_bivs (struct ivopts_data *data)
994 gimple phi;
995 tree step, type, base;
996 bool found = false;
997 struct loop *loop = data->current_loop;
998 gimple_stmt_iterator psi;
1000 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1002 phi = gsi_stmt (psi);
1004 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005 continue;
1007 step = determine_biv_step (phi);
1008 if (!step)
1009 continue;
1011 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012 base = expand_simple_operations (base);
1013 if (contains_abnormal_ssa_name_p (base)
1014 || contains_abnormal_ssa_name_p (step))
1015 continue;
1017 type = TREE_TYPE (PHI_RESULT (phi));
1018 base = fold_convert (type, base);
1019 if (step)
1021 if (POINTER_TYPE_P (type))
1022 step = fold_convert (sizetype, step);
1023 else
1024 step = fold_convert (type, step);
1027 set_iv (data, PHI_RESULT (phi), base, step);
1028 found = true;
1031 return found;
1034 /* Marks basic ivs. */
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1039 gimple phi;
1040 tree var;
1041 struct iv *iv, *incr_iv;
1042 struct loop *loop = data->current_loop;
1043 basic_block incr_bb;
1044 gimple_stmt_iterator psi;
1046 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1048 phi = gsi_stmt (psi);
1050 iv = get_iv (data, PHI_RESULT (phi));
1051 if (!iv)
1052 continue;
1054 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055 incr_iv = get_iv (data, var);
1056 if (!incr_iv)
1057 continue;
1059 /* If the increment is in the subloop, ignore it. */
1060 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061 if (incr_bb->loop_father != data->current_loop
1062 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063 continue;
1065 iv->biv_p = true;
1066 incr_iv->biv_p = true;
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071 parameters to IV. */
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1076 tree lhs;
1077 struct loop *loop = data->current_loop;
1079 iv->base = NULL_TREE;
1080 iv->step = NULL_TREE;
1082 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083 return false;
1085 lhs = gimple_assign_lhs (stmt);
1086 if (TREE_CODE (lhs) != SSA_NAME)
1087 return false;
1089 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090 return false;
1091 iv->base = expand_simple_operations (iv->base);
1093 if (contains_abnormal_ssa_name_p (iv->base)
1094 || contains_abnormal_ssa_name_p (iv->step))
1095 return false;
1097 return true;
1100 /* Finds general ivs in statement STMT. */
1102 static void
1103 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1105 affine_iv iv;
1107 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1108 return;
1110 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1113 /* Finds general ivs in basic block BB. */
1115 static void
1116 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1118 gimple_stmt_iterator bsi;
1120 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1121 find_givs_in_stmt (data, gsi_stmt (bsi));
1124 /* Finds general ivs. */
1126 static void
1127 find_givs (struct ivopts_data *data)
1129 struct loop *loop = data->current_loop;
1130 basic_block *body = get_loop_body_in_dom_order (loop);
1131 unsigned i;
1133 for (i = 0; i < loop->num_nodes; i++)
1134 find_givs_in_bb (data, body[i]);
1135 free (body);
1138 /* For each ssa name defined in LOOP determines whether it is an induction
1139 variable and if so, its initial value and step. */
1141 static bool
1142 find_induction_variables (struct ivopts_data *data)
1144 unsigned i;
1145 bitmap_iterator bi;
1147 if (!find_bivs (data))
1148 return false;
1150 find_givs (data);
1151 mark_bivs (data);
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1155 tree niter = niter_for_single_dom_exit (data);
1157 if (niter)
1159 fprintf (dump_file, " number of iterations ");
1160 print_generic_expr (dump_file, niter, TDF_SLIM);
1161 fprintf (dump_file, "\n\n");
1164 fprintf (dump_file, "Induction variables:\n\n");
1166 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1168 if (ver_info (data, i)->iv)
1169 dump_iv (dump_file, ver_info (data, i)->iv);
1173 return true;
1176 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1178 static struct iv_use *
1179 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1180 gimple stmt, enum use_type use_type)
1182 struct iv_use *use = XCNEW (struct iv_use);
1184 use->id = n_iv_uses (data);
1185 use->type = use_type;
1186 use->iv = iv;
1187 use->stmt = stmt;
1188 use->op_p = use_p;
1189 use->related_cands = BITMAP_ALLOC (NULL);
1191 /* To avoid showing ssa name in the dumps, if it was not reset by the
1192 caller. */
1193 iv->ssa_name = NULL_TREE;
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 dump_use (dump_file, use);
1198 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1200 return use;
1203 /* Checks whether OP is a loop-level invariant and if so, records it.
1204 NONLINEAR_USE is true if the invariant is used in a way we do not
1205 handle specially. */
1207 static void
1208 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1210 basic_block bb;
1211 struct version_info *info;
1213 if (TREE_CODE (op) != SSA_NAME
1214 || !is_gimple_reg (op))
1215 return;
1217 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1218 if (bb
1219 && flow_bb_inside_loop_p (data->current_loop, bb))
1220 return;
1222 info = name_info (data, op);
1223 info->name = op;
1224 info->has_nonlin_use |= nonlinear_use;
1225 if (!info->inv_id)
1226 info->inv_id = ++data->max_inv_id;
1227 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1230 /* Checks whether the use OP is interesting and if so, records it. */
1232 static struct iv_use *
1233 find_interesting_uses_op (struct ivopts_data *data, tree op)
1235 struct iv *iv;
1236 struct iv *civ;
1237 gimple stmt;
1238 struct iv_use *use;
1240 if (TREE_CODE (op) != SSA_NAME)
1241 return NULL;
1243 iv = get_iv (data, op);
1244 if (!iv)
1245 return NULL;
1247 if (iv->have_use_for)
1249 use = iv_use (data, iv->use_id);
1251 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1252 return use;
1255 if (integer_zerop (iv->step))
1257 record_invariant (data, op, true);
1258 return NULL;
1260 iv->have_use_for = true;
1262 civ = XNEW (struct iv);
1263 *civ = *iv;
1265 stmt = SSA_NAME_DEF_STMT (op);
1266 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1267 || is_gimple_assign (stmt));
1269 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1270 iv->use_id = use->id;
1272 return use;
1275 /* Given a condition in statement STMT, checks whether it is a compare
1276 of an induction variable and an invariant. If this is the case,
1277 CONTROL_VAR is set to location of the iv, BOUND to the location of
1278 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1279 induction variable descriptions, and true is returned. If this is not
1280 the case, CONTROL_VAR and BOUND are set to the arguments of the
1281 condition and false is returned. */
1283 static bool
1284 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1285 tree **control_var, tree **bound,
1286 struct iv **iv_var, struct iv **iv_bound)
1288 /* The objects returned when COND has constant operands. */
1289 static struct iv const_iv;
1290 static tree zero;
1291 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1292 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1293 bool ret = false;
1295 if (gimple_code (stmt) == GIMPLE_COND)
1297 op0 = gimple_cond_lhs_ptr (stmt);
1298 op1 = gimple_cond_rhs_ptr (stmt);
1300 else
1302 op0 = gimple_assign_rhs1_ptr (stmt);
1303 op1 = gimple_assign_rhs2_ptr (stmt);
1306 zero = integer_zero_node;
1307 const_iv.step = integer_zero_node;
1309 if (TREE_CODE (*op0) == SSA_NAME)
1310 iv0 = get_iv (data, *op0);
1311 if (TREE_CODE (*op1) == SSA_NAME)
1312 iv1 = get_iv (data, *op1);
1314 /* Exactly one of the compared values must be an iv, and the other one must
1315 be an invariant. */
1316 if (!iv0 || !iv1)
1317 goto end;
1319 if (integer_zerop (iv0->step))
1321 /* Control variable may be on the other side. */
1322 tmp_op = op0; op0 = op1; op1 = tmp_op;
1323 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1325 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1327 end:
1328 if (control_var)
1329 *control_var = op0;;
1330 if (iv_var)
1331 *iv_var = iv0;;
1332 if (bound)
1333 *bound = op1;
1334 if (iv_bound)
1335 *iv_bound = iv1;
1337 return ret;
1340 /* Checks whether the condition in STMT is interesting and if so,
1341 records it. */
1343 static void
1344 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1346 tree *var_p, *bound_p;
1347 struct iv *var_iv, *civ;
1349 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1351 find_interesting_uses_op (data, *var_p);
1352 find_interesting_uses_op (data, *bound_p);
1353 return;
1356 civ = XNEW (struct iv);
1357 *civ = *var_iv;
1358 record_use (data, NULL, civ, stmt, USE_COMPARE);
1361 /* Returns true if expression EXPR is obviously invariant in LOOP,
1362 i.e. if all its operands are defined outside of the LOOP. LOOP
1363 should not be the function body. */
1365 bool
1366 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1368 basic_block def_bb;
1369 unsigned i, len;
1371 gcc_assert (loop_depth (loop) > 0);
1373 if (is_gimple_min_invariant (expr))
1374 return true;
1376 if (TREE_CODE (expr) == SSA_NAME)
1378 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1379 if (def_bb
1380 && flow_bb_inside_loop_p (loop, def_bb))
1381 return false;
1383 return true;
1386 if (!EXPR_P (expr))
1387 return false;
1389 len = TREE_OPERAND_LENGTH (expr);
1390 for (i = 0; i < len; i++)
1391 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1392 return false;
1394 return true;
1397 /* Returns true if statement STMT is obviously invariant in LOOP,
1398 i.e. if all its operands on the RHS are defined outside of the LOOP.
1399 LOOP should not be the function body. */
1401 bool
1402 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1404 unsigned i;
1405 tree lhs;
1407 gcc_assert (loop_depth (loop) > 0);
1409 lhs = gimple_get_lhs (stmt);
1410 for (i = 0; i < gimple_num_ops (stmt); i++)
1412 tree op = gimple_op (stmt, i);
1413 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1414 return false;
1417 return true;
1420 /* Cumulates the steps of indices into DATA and replaces their values with the
1421 initial ones. Returns false when the value of the index cannot be determined.
1422 Callback for for_each_index. */
1424 struct ifs_ivopts_data
1426 struct ivopts_data *ivopts_data;
1427 gimple stmt;
1428 tree step;
1431 static bool
1432 idx_find_step (tree base, tree *idx, void *data)
1434 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1435 struct iv *iv;
1436 tree step, iv_base, iv_step, lbound, off;
1437 struct loop *loop = dta->ivopts_data->current_loop;
1439 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1440 return false;
1442 /* If base is a component ref, require that the offset of the reference
1443 be invariant. */
1444 if (TREE_CODE (base) == COMPONENT_REF)
1446 off = component_ref_field_offset (base);
1447 return expr_invariant_in_loop_p (loop, off);
1450 /* If base is array, first check whether we will be able to move the
1451 reference out of the loop (in order to take its address in strength
1452 reduction). In order for this to work we need both lower bound
1453 and step to be loop invariants. */
1454 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1456 /* Moreover, for a range, the size needs to be invariant as well. */
1457 if (TREE_CODE (base) == ARRAY_RANGE_REF
1458 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1459 return false;
1461 step = array_ref_element_size (base);
1462 lbound = array_ref_low_bound (base);
1464 if (!expr_invariant_in_loop_p (loop, step)
1465 || !expr_invariant_in_loop_p (loop, lbound))
1466 return false;
1469 if (TREE_CODE (*idx) != SSA_NAME)
1470 return true;
1472 iv = get_iv (dta->ivopts_data, *idx);
1473 if (!iv)
1474 return false;
1476 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1477 *&x[0], which is not folded and does not trigger the
1478 ARRAY_REF path below. */
1479 *idx = iv->base;
1481 if (integer_zerop (iv->step))
1482 return true;
1484 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1486 step = array_ref_element_size (base);
1488 /* We only handle addresses whose step is an integer constant. */
1489 if (TREE_CODE (step) != INTEGER_CST)
1490 return false;
1492 else
1493 /* The step for pointer arithmetics already is 1 byte. */
1494 step = size_one_node;
1496 iv_base = iv->base;
1497 iv_step = iv->step;
1498 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1499 sizetype, &iv_base, &iv_step, dta->stmt,
1500 false))
1502 /* The index might wrap. */
1503 return false;
1506 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1507 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1509 return true;
1512 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1513 object is passed to it in DATA. */
1515 static bool
1516 idx_record_use (tree base, tree *idx,
1517 void *vdata)
1519 struct ivopts_data *data = (struct ivopts_data *) vdata;
1520 find_interesting_uses_op (data, *idx);
1521 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1523 find_interesting_uses_op (data, array_ref_element_size (base));
1524 find_interesting_uses_op (data, array_ref_low_bound (base));
1526 return true;
1529 /* If we can prove that TOP = cst * BOT for some constant cst,
1530 store cst to MUL and return true. Otherwise return false.
1531 The returned value is always sign-extended, regardless of the
1532 signedness of TOP and BOT. */
1534 static bool
1535 constant_multiple_of (tree top, tree bot, double_int *mul)
1537 tree mby;
1538 enum tree_code code;
1539 double_int res, p0, p1;
1540 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1542 STRIP_NOPS (top);
1543 STRIP_NOPS (bot);
1545 if (operand_equal_p (top, bot, 0))
1547 *mul = double_int_one;
1548 return true;
1551 code = TREE_CODE (top);
1552 switch (code)
1554 case MULT_EXPR:
1555 mby = TREE_OPERAND (top, 1);
1556 if (TREE_CODE (mby) != INTEGER_CST)
1557 return false;
1559 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1560 return false;
1562 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1563 precision);
1564 return true;
1566 case PLUS_EXPR:
1567 case MINUS_EXPR:
1568 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1569 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1570 return false;
1572 if (code == MINUS_EXPR)
1573 p1 = double_int_neg (p1);
1574 *mul = double_int_sext (double_int_add (p0, p1), precision);
1575 return true;
1577 case INTEGER_CST:
1578 if (TREE_CODE (bot) != INTEGER_CST)
1579 return false;
1581 p0 = double_int_sext (tree_to_double_int (top), precision);
1582 p1 = double_int_sext (tree_to_double_int (bot), precision);
1583 if (double_int_zero_p (p1))
1584 return false;
1585 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1586 precision);
1587 return double_int_zero_p (res);
1589 default:
1590 return false;
1594 /* Returns true if memory reference REF with step STEP may be unaligned. */
1596 static bool
1597 may_be_unaligned_p (tree ref, tree step)
1599 tree base;
1600 tree base_type;
1601 HOST_WIDE_INT bitsize;
1602 HOST_WIDE_INT bitpos;
1603 tree toffset;
1604 enum machine_mode mode;
1605 int unsignedp, volatilep;
1606 unsigned base_align;
1608 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1609 thus they are not misaligned. */
1610 if (TREE_CODE (ref) == TARGET_MEM_REF)
1611 return false;
1613 /* The test below is basically copy of what expr.c:normal_inner_ref
1614 does to check whether the object must be loaded by parts when
1615 STRICT_ALIGNMENT is true. */
1616 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1617 &unsignedp, &volatilep, true);
1618 base_type = TREE_TYPE (base);
1619 base_align = TYPE_ALIGN (base_type);
1621 if (mode != BLKmode)
1623 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1625 if (base_align < mode_align
1626 || (bitpos % mode_align) != 0
1627 || (bitpos % BITS_PER_UNIT) != 0)
1628 return true;
1630 if (toffset
1631 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1632 return true;
1634 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1635 return true;
1638 return false;
1641 /* Return true if EXPR may be non-addressable. */
1643 static bool
1644 may_be_nonaddressable_p (tree expr)
1646 switch (TREE_CODE (expr))
1648 case TARGET_MEM_REF:
1649 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1650 target, thus they are always addressable. */
1651 return false;
1653 case COMPONENT_REF:
1654 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1655 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1657 case VIEW_CONVERT_EXPR:
1658 /* This kind of view-conversions may wrap non-addressable objects
1659 and make them look addressable. After some processing the
1660 non-addressability may be uncovered again, causing ADDR_EXPRs
1661 of inappropriate objects to be built. */
1662 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1663 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1664 return true;
1666 /* ... fall through ... */
1668 case ARRAY_REF:
1669 case ARRAY_RANGE_REF:
1670 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1672 CASE_CONVERT:
1673 return true;
1675 default:
1676 break;
1679 return false;
1682 /* Finds addresses in *OP_P inside STMT. */
1684 static void
1685 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1687 tree base = *op_p, step = size_zero_node;
1688 struct iv *civ;
1689 struct ifs_ivopts_data ifs_ivopts_data;
1691 /* Do not play with volatile memory references. A bit too conservative,
1692 perhaps, but safe. */
1693 if (gimple_has_volatile_ops (stmt))
1694 goto fail;
1696 /* Ignore bitfields for now. Not really something terribly complicated
1697 to handle. TODO. */
1698 if (TREE_CODE (base) == BIT_FIELD_REF)
1699 goto fail;
1701 base = unshare_expr (base);
1703 if (TREE_CODE (base) == TARGET_MEM_REF)
1705 tree type = build_pointer_type (TREE_TYPE (base));
1706 tree astep;
1708 if (TMR_BASE (base)
1709 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1711 civ = get_iv (data, TMR_BASE (base));
1712 if (!civ)
1713 goto fail;
1715 TMR_BASE (base) = civ->base;
1716 step = civ->step;
1718 if (TMR_INDEX (base)
1719 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1721 civ = get_iv (data, TMR_INDEX (base));
1722 if (!civ)
1723 goto fail;
1725 TMR_INDEX (base) = civ->base;
1726 astep = civ->step;
1728 if (astep)
1730 if (TMR_STEP (base))
1731 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1733 step = fold_build2 (PLUS_EXPR, type, step, astep);
1737 if (integer_zerop (step))
1738 goto fail;
1739 base = tree_mem_ref_addr (type, base);
1741 else
1743 ifs_ivopts_data.ivopts_data = data;
1744 ifs_ivopts_data.stmt = stmt;
1745 ifs_ivopts_data.step = size_zero_node;
1746 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1747 || integer_zerop (ifs_ivopts_data.step))
1748 goto fail;
1749 step = ifs_ivopts_data.step;
1751 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1753 /* Check that the base expression is addressable. This needs
1754 to be done after substituting bases of IVs into it. */
1755 if (may_be_nonaddressable_p (base))
1756 goto fail;
1758 /* Moreover, on strict alignment platforms, check that it is
1759 sufficiently aligned. */
1760 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1761 goto fail;
1763 base = build_fold_addr_expr (base);
1765 /* Substituting bases of IVs into the base expression might
1766 have caused folding opportunities. */
1767 if (TREE_CODE (base) == ADDR_EXPR)
1769 tree *ref = &TREE_OPERAND (base, 0);
1770 while (handled_component_p (*ref))
1771 ref = &TREE_OPERAND (*ref, 0);
1772 if (TREE_CODE (*ref) == MEM_REF)
1774 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1775 TREE_OPERAND (*ref, 0),
1776 TREE_OPERAND (*ref, 1));
1777 if (tem)
1778 *ref = tem;
1783 civ = alloc_iv (base, step);
1784 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1785 return;
1787 fail:
1788 for_each_index (op_p, idx_record_use, data);
1791 /* Finds and records invariants used in STMT. */
1793 static void
1794 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1796 ssa_op_iter iter;
1797 use_operand_p use_p;
1798 tree op;
1800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1802 op = USE_FROM_PTR (use_p);
1803 record_invariant (data, op, false);
1807 /* Finds interesting uses of induction variables in the statement STMT. */
1809 static void
1810 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1812 struct iv *iv;
1813 tree op, *lhs, *rhs;
1814 ssa_op_iter iter;
1815 use_operand_p use_p;
1816 enum tree_code code;
1818 find_invariants_stmt (data, stmt);
1820 if (gimple_code (stmt) == GIMPLE_COND)
1822 find_interesting_uses_cond (data, stmt);
1823 return;
1826 if (is_gimple_assign (stmt))
1828 lhs = gimple_assign_lhs_ptr (stmt);
1829 rhs = gimple_assign_rhs1_ptr (stmt);
1831 if (TREE_CODE (*lhs) == SSA_NAME)
1833 /* If the statement defines an induction variable, the uses are not
1834 interesting by themselves. */
1836 iv = get_iv (data, *lhs);
1838 if (iv && !integer_zerop (iv->step))
1839 return;
1842 code = gimple_assign_rhs_code (stmt);
1843 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1844 && (REFERENCE_CLASS_P (*rhs)
1845 || is_gimple_val (*rhs)))
1847 if (REFERENCE_CLASS_P (*rhs))
1848 find_interesting_uses_address (data, stmt, rhs);
1849 else
1850 find_interesting_uses_op (data, *rhs);
1852 if (REFERENCE_CLASS_P (*lhs))
1853 find_interesting_uses_address (data, stmt, lhs);
1854 return;
1856 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1858 find_interesting_uses_cond (data, stmt);
1859 return;
1862 /* TODO -- we should also handle address uses of type
1864 memory = call (whatever);
1868 call (memory). */
1871 if (gimple_code (stmt) == GIMPLE_PHI
1872 && gimple_bb (stmt) == data->current_loop->header)
1874 iv = get_iv (data, PHI_RESULT (stmt));
1876 if (iv && !integer_zerop (iv->step))
1877 return;
1880 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1882 op = USE_FROM_PTR (use_p);
1884 if (TREE_CODE (op) != SSA_NAME)
1885 continue;
1887 iv = get_iv (data, op);
1888 if (!iv)
1889 continue;
1891 find_interesting_uses_op (data, op);
1895 /* Finds interesting uses of induction variables outside of loops
1896 on loop exit edge EXIT. */
1898 static void
1899 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1901 gimple phi;
1902 gimple_stmt_iterator psi;
1903 tree def;
1905 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1907 phi = gsi_stmt (psi);
1908 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1909 if (is_gimple_reg (def))
1910 find_interesting_uses_op (data, def);
1914 /* Finds uses of the induction variables that are interesting. */
1916 static void
1917 find_interesting_uses (struct ivopts_data *data)
1919 basic_block bb;
1920 gimple_stmt_iterator bsi;
1921 basic_block *body = get_loop_body (data->current_loop);
1922 unsigned i;
1923 struct version_info *info;
1924 edge e;
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1927 fprintf (dump_file, "Uses:\n\n");
1929 for (i = 0; i < data->current_loop->num_nodes; i++)
1931 edge_iterator ei;
1932 bb = body[i];
1934 FOR_EACH_EDGE (e, ei, bb->succs)
1935 if (e->dest != EXIT_BLOCK_PTR
1936 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1937 find_interesting_uses_outside (data, e);
1939 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1940 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1941 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1942 if (!is_gimple_debug (gsi_stmt (bsi)))
1943 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 bitmap_iterator bi;
1950 fprintf (dump_file, "\n");
1952 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1954 info = ver_info (data, i);
1955 if (info->inv_id)
1957 fprintf (dump_file, " ");
1958 print_generic_expr (dump_file, info->name, TDF_SLIM);
1959 fprintf (dump_file, " is invariant (%d)%s\n",
1960 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1964 fprintf (dump_file, "\n");
1967 free (body);
1970 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1971 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1972 we are at the top-level of the processed address. */
1974 static tree
1975 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1976 unsigned HOST_WIDE_INT *offset)
1978 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1979 enum tree_code code;
1980 tree type, orig_type = TREE_TYPE (expr);
1981 unsigned HOST_WIDE_INT off0, off1, st;
1982 tree orig_expr = expr;
1984 STRIP_NOPS (expr);
1986 type = TREE_TYPE (expr);
1987 code = TREE_CODE (expr);
1988 *offset = 0;
1990 switch (code)
1992 case INTEGER_CST:
1993 if (!cst_and_fits_in_hwi (expr)
1994 || integer_zerop (expr))
1995 return orig_expr;
1997 *offset = int_cst_value (expr);
1998 return build_int_cst (orig_type, 0);
2000 case POINTER_PLUS_EXPR:
2001 case PLUS_EXPR:
2002 case MINUS_EXPR:
2003 op0 = TREE_OPERAND (expr, 0);
2004 op1 = TREE_OPERAND (expr, 1);
2006 op0 = strip_offset_1 (op0, false, false, &off0);
2007 op1 = strip_offset_1 (op1, false, false, &off1);
2009 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2010 if (op0 == TREE_OPERAND (expr, 0)
2011 && op1 == TREE_OPERAND (expr, 1))
2012 return orig_expr;
2014 if (integer_zerop (op1))
2015 expr = op0;
2016 else if (integer_zerop (op0))
2018 if (code == MINUS_EXPR)
2019 expr = fold_build1 (NEGATE_EXPR, type, op1);
2020 else
2021 expr = op1;
2023 else
2024 expr = fold_build2 (code, type, op0, op1);
2026 return fold_convert (orig_type, expr);
2028 case MULT_EXPR:
2029 op1 = TREE_OPERAND (expr, 1);
2030 if (!cst_and_fits_in_hwi (op1))
2031 return orig_expr;
2033 op0 = TREE_OPERAND (expr, 0);
2034 op0 = strip_offset_1 (op0, false, false, &off0);
2035 if (op0 == TREE_OPERAND (expr, 0))
2036 return orig_expr;
2038 *offset = off0 * int_cst_value (op1);
2039 if (integer_zerop (op0))
2040 expr = op0;
2041 else
2042 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2044 return fold_convert (orig_type, expr);
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (!inside_addr)
2049 return orig_expr;
2051 step = array_ref_element_size (expr);
2052 if (!cst_and_fits_in_hwi (step))
2053 break;
2055 st = int_cst_value (step);
2056 op1 = TREE_OPERAND (expr, 1);
2057 op1 = strip_offset_1 (op1, false, false, &off1);
2058 *offset = off1 * st;
2060 if (top_compref
2061 && integer_zerop (op1))
2063 /* Strip the component reference completely. */
2064 op0 = TREE_OPERAND (expr, 0);
2065 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2066 *offset += off0;
2067 return op0;
2069 break;
2071 case COMPONENT_REF:
2072 if (!inside_addr)
2073 return orig_expr;
2075 tmp = component_ref_field_offset (expr);
2076 if (top_compref
2077 && cst_and_fits_in_hwi (tmp))
2079 /* Strip the component reference completely. */
2080 op0 = TREE_OPERAND (expr, 0);
2081 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2082 *offset = off0 + int_cst_value (tmp);
2083 return op0;
2085 break;
2087 case ADDR_EXPR:
2088 op0 = TREE_OPERAND (expr, 0);
2089 op0 = strip_offset_1 (op0, true, true, &off0);
2090 *offset += off0;
2092 if (op0 == TREE_OPERAND (expr, 0))
2093 return orig_expr;
2095 expr = build_fold_addr_expr (op0);
2096 return fold_convert (orig_type, expr);
2098 case MEM_REF:
2099 /* ??? Offset operand? */
2100 inside_addr = false;
2101 break;
2103 default:
2104 return orig_expr;
2107 /* Default handling of expressions for that we want to recurse into
2108 the first operand. */
2109 op0 = TREE_OPERAND (expr, 0);
2110 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2111 *offset += off0;
2113 if (op0 == TREE_OPERAND (expr, 0)
2114 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2115 return orig_expr;
2117 expr = copy_node (expr);
2118 TREE_OPERAND (expr, 0) = op0;
2119 if (op1)
2120 TREE_OPERAND (expr, 1) = op1;
2122 /* Inside address, we might strip the top level component references,
2123 thus changing type of the expression. Handling of ADDR_EXPR
2124 will fix that. */
2125 expr = fold_convert (orig_type, expr);
2127 return expr;
2130 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2132 static tree
2133 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2135 return strip_offset_1 (expr, false, false, offset);
2138 /* Returns variant of TYPE that can be used as base for different uses.
2139 We return unsigned type with the same precision, which avoids problems
2140 with overflows. */
2142 static tree
2143 generic_type_for (tree type)
2145 if (POINTER_TYPE_P (type))
2146 return unsigned_type_for (type);
2148 if (TYPE_UNSIGNED (type))
2149 return type;
2151 return unsigned_type_for (type);
2154 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2155 the bitmap to that we should store it. */
2157 static struct ivopts_data *fd_ivopts_data;
2158 static tree
2159 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2161 bitmap *depends_on = (bitmap *) data;
2162 struct version_info *info;
2164 if (TREE_CODE (*expr_p) != SSA_NAME)
2165 return NULL_TREE;
2166 info = name_info (fd_ivopts_data, *expr_p);
2168 if (!info->inv_id || info->has_nonlin_use)
2169 return NULL_TREE;
2171 if (!*depends_on)
2172 *depends_on = BITMAP_ALLOC (NULL);
2173 bitmap_set_bit (*depends_on, info->inv_id);
2175 return NULL_TREE;
2178 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2179 position to POS. If USE is not NULL, the candidate is set as related to
2180 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2181 replacement of the final value of the iv by a direct computation. */
2183 static struct iv_cand *
2184 add_candidate_1 (struct ivopts_data *data,
2185 tree base, tree step, bool important, enum iv_position pos,
2186 struct iv_use *use, gimple incremented_at)
2188 unsigned i;
2189 struct iv_cand *cand = NULL;
2190 tree type, orig_type;
2192 if (base)
2194 orig_type = TREE_TYPE (base);
2195 type = generic_type_for (orig_type);
2196 if (type != orig_type)
2198 base = fold_convert (type, base);
2199 step = fold_convert (type, step);
2203 for (i = 0; i < n_iv_cands (data); i++)
2205 cand = iv_cand (data, i);
2207 if (cand->pos != pos)
2208 continue;
2210 if (cand->incremented_at != incremented_at
2211 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2212 && cand->ainc_use != use))
2213 continue;
2215 if (!cand->iv)
2217 if (!base && !step)
2218 break;
2220 continue;
2223 if (!base && !step)
2224 continue;
2226 if (operand_equal_p (base, cand->iv->base, 0)
2227 && operand_equal_p (step, cand->iv->step, 0)
2228 && (TYPE_PRECISION (TREE_TYPE (base))
2229 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2230 break;
2233 if (i == n_iv_cands (data))
2235 cand = XCNEW (struct iv_cand);
2236 cand->id = i;
2238 if (!base && !step)
2239 cand->iv = NULL;
2240 else
2241 cand->iv = alloc_iv (base, step);
2243 cand->pos = pos;
2244 if (pos != IP_ORIGINAL && cand->iv)
2246 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2247 cand->var_after = cand->var_before;
2249 cand->important = important;
2250 cand->incremented_at = incremented_at;
2251 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2253 if (step
2254 && TREE_CODE (step) != INTEGER_CST)
2256 fd_ivopts_data = data;
2257 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2260 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2261 cand->ainc_use = use;
2262 else
2263 cand->ainc_use = NULL;
2265 if (dump_file && (dump_flags & TDF_DETAILS))
2266 dump_cand (dump_file, cand);
2269 if (important && !cand->important)
2271 cand->important = true;
2272 if (dump_file && (dump_flags & TDF_DETAILS))
2273 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2276 if (use)
2278 bitmap_set_bit (use->related_cands, i);
2279 if (dump_file && (dump_flags & TDF_DETAILS))
2280 fprintf (dump_file, "Candidate %d is related to use %d\n",
2281 cand->id, use->id);
2284 return cand;
2287 /* Returns true if incrementing the induction variable at the end of the LOOP
2288 is allowed.
2290 The purpose is to avoid splitting latch edge with a biv increment, thus
2291 creating a jump, possibly confusing other optimization passes and leaving
2292 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2293 is not available (so we do not have a better alternative), or if the latch
2294 edge is already nonempty. */
2296 static bool
2297 allow_ip_end_pos_p (struct loop *loop)
2299 if (!ip_normal_pos (loop))
2300 return true;
2302 if (!empty_block_p (ip_end_pos (loop)))
2303 return true;
2305 return false;
2308 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2309 Important field is set to IMPORTANT. */
2311 static void
2312 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2313 bool important, struct iv_use *use)
2315 basic_block use_bb = gimple_bb (use->stmt);
2316 enum machine_mode mem_mode;
2317 unsigned HOST_WIDE_INT cstepi;
2319 /* If we insert the increment in any position other than the standard
2320 ones, we must ensure that it is incremented once per iteration.
2321 It must not be in an inner nested loop, or one side of an if
2322 statement. */
2323 if (use_bb->loop_father != data->current_loop
2324 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2325 || stmt_could_throw_p (use->stmt)
2326 || !cst_and_fits_in_hwi (step))
2327 return;
2329 cstepi = int_cst_value (step);
2331 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2332 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2333 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2335 enum tree_code code = MINUS_EXPR;
2336 tree new_base;
2337 tree new_step = step;
2339 if (POINTER_TYPE_P (TREE_TYPE (base)))
2341 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2342 code = POINTER_PLUS_EXPR;
2344 else
2345 new_step = fold_convert (TREE_TYPE (base), new_step);
2346 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2347 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2348 use->stmt);
2350 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2351 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2353 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2354 use->stmt);
2358 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2359 position to POS. If USE is not NULL, the candidate is set as related to
2360 it. The candidate computation is scheduled on all available positions. */
2362 static void
2363 add_candidate (struct ivopts_data *data,
2364 tree base, tree step, bool important, struct iv_use *use)
2366 if (ip_normal_pos (data->current_loop))
2367 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2368 if (ip_end_pos (data->current_loop)
2369 && allow_ip_end_pos_p (data->current_loop))
2370 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2372 if (use != NULL && use->type == USE_ADDRESS)
2373 add_autoinc_candidates (data, base, step, important, use);
2376 /* Add a standard "0 + 1 * iteration" iv candidate for a
2377 type with SIZE bits. */
2379 static void
2380 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2381 unsigned int size)
2383 tree type = lang_hooks.types.type_for_size (size, true);
2384 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2385 true, NULL);
2388 /* Adds standard iv candidates. */
2390 static void
2391 add_standard_iv_candidates (struct ivopts_data *data)
2393 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2395 /* The same for a double-integer type if it is still fast enough. */
2396 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2397 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2401 /* Adds candidates bases on the old induction variable IV. */
2403 static void
2404 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2406 gimple phi;
2407 tree def;
2408 struct iv_cand *cand;
2410 add_candidate (data, iv->base, iv->step, true, NULL);
2412 /* The same, but with initial value zero. */
2413 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2414 add_candidate (data, size_int (0), iv->step, true, NULL);
2415 else
2416 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2417 iv->step, true, NULL);
2419 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2420 if (gimple_code (phi) == GIMPLE_PHI)
2422 /* Additionally record the possibility of leaving the original iv
2423 untouched. */
2424 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2425 cand = add_candidate_1 (data,
2426 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2427 SSA_NAME_DEF_STMT (def));
2428 cand->var_before = iv->ssa_name;
2429 cand->var_after = def;
2433 /* Adds candidates based on the old induction variables. */
2435 static void
2436 add_old_ivs_candidates (struct ivopts_data *data)
2438 unsigned i;
2439 struct iv *iv;
2440 bitmap_iterator bi;
2442 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2444 iv = ver_info (data, i)->iv;
2445 if (iv && iv->biv_p && !integer_zerop (iv->step))
2446 add_old_iv_candidates (data, iv);
2450 /* Adds candidates based on the value of the induction variable IV and USE. */
2452 static void
2453 add_iv_value_candidates (struct ivopts_data *data,
2454 struct iv *iv, struct iv_use *use)
2456 unsigned HOST_WIDE_INT offset;
2457 tree base;
2458 tree basetype;
2460 add_candidate (data, iv->base, iv->step, false, use);
2462 /* The same, but with initial value zero. Make such variable important,
2463 since it is generic enough so that possibly many uses may be based
2464 on it. */
2465 basetype = TREE_TYPE (iv->base);
2466 if (POINTER_TYPE_P (basetype))
2467 basetype = sizetype;
2468 add_candidate (data, build_int_cst (basetype, 0),
2469 iv->step, true, use);
2471 /* Third, try removing the constant offset. Make sure to even
2472 add a candidate for &a[0] vs. (T *)&a. */
2473 base = strip_offset (iv->base, &offset);
2474 if (offset
2475 || base != iv->base)
2476 add_candidate (data, base, iv->step, false, use);
2479 /* Adds candidates based on the uses. */
2481 static void
2482 add_derived_ivs_candidates (struct ivopts_data *data)
2484 unsigned i;
2486 for (i = 0; i < n_iv_uses (data); i++)
2488 struct iv_use *use = iv_use (data, i);
2490 if (!use)
2491 continue;
2493 switch (use->type)
2495 case USE_NONLINEAR_EXPR:
2496 case USE_COMPARE:
2497 case USE_ADDRESS:
2498 /* Just add the ivs based on the value of the iv used here. */
2499 add_iv_value_candidates (data, use->iv, use);
2500 break;
2502 default:
2503 gcc_unreachable ();
2508 /* Record important candidates and add them to related_cands bitmaps
2509 if needed. */
2511 static void
2512 record_important_candidates (struct ivopts_data *data)
2514 unsigned i;
2515 struct iv_use *use;
2517 for (i = 0; i < n_iv_cands (data); i++)
2519 struct iv_cand *cand = iv_cand (data, i);
2521 if (cand->important)
2522 bitmap_set_bit (data->important_candidates, i);
2525 data->consider_all_candidates = (n_iv_cands (data)
2526 <= CONSIDER_ALL_CANDIDATES_BOUND);
2528 if (data->consider_all_candidates)
2530 /* We will not need "related_cands" bitmaps in this case,
2531 so release them to decrease peak memory consumption. */
2532 for (i = 0; i < n_iv_uses (data); i++)
2534 use = iv_use (data, i);
2535 BITMAP_FREE (use->related_cands);
2538 else
2540 /* Add important candidates to the related_cands bitmaps. */
2541 for (i = 0; i < n_iv_uses (data); i++)
2542 bitmap_ior_into (iv_use (data, i)->related_cands,
2543 data->important_candidates);
2547 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2548 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2549 we allocate a simple list to every use. */
2551 static void
2552 alloc_use_cost_map (struct ivopts_data *data)
2554 unsigned i, size, s, j;
2556 for (i = 0; i < n_iv_uses (data); i++)
2558 struct iv_use *use = iv_use (data, i);
2559 bitmap_iterator bi;
2561 if (data->consider_all_candidates)
2562 size = n_iv_cands (data);
2563 else
2565 s = 0;
2566 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2568 s++;
2571 /* Round up to the power of two, so that moduling by it is fast. */
2572 for (size = 1; size < s; size <<= 1)
2573 continue;
2576 use->n_map_members = size;
2577 use->cost_map = XCNEWVEC (struct cost_pair, size);
2581 /* Returns description of computation cost of expression whose runtime
2582 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2584 static comp_cost
2585 new_cost (unsigned runtime, unsigned complexity)
2587 comp_cost cost;
2589 cost.cost = runtime;
2590 cost.complexity = complexity;
2592 return cost;
2595 /* Adds costs COST1 and COST2. */
2597 static comp_cost
2598 add_costs (comp_cost cost1, comp_cost cost2)
2600 cost1.cost += cost2.cost;
2601 cost1.complexity += cost2.complexity;
2603 return cost1;
2605 /* Subtracts costs COST1 and COST2. */
2607 static comp_cost
2608 sub_costs (comp_cost cost1, comp_cost cost2)
2610 cost1.cost -= cost2.cost;
2611 cost1.complexity -= cost2.complexity;
2613 return cost1;
2616 /* Returns a negative number if COST1 < COST2, a positive number if
2617 COST1 > COST2, and 0 if COST1 = COST2. */
2619 static int
2620 compare_costs (comp_cost cost1, comp_cost cost2)
2622 if (cost1.cost == cost2.cost)
2623 return cost1.complexity - cost2.complexity;
2625 return cost1.cost - cost2.cost;
2628 /* Returns true if COST is infinite. */
2630 static bool
2631 infinite_cost_p (comp_cost cost)
2633 return cost.cost == INFTY;
2636 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2637 on invariants DEPENDS_ON and that the value used in expressing it
2638 is VALUE. */
2640 static void
2641 set_use_iv_cost (struct ivopts_data *data,
2642 struct iv_use *use, struct iv_cand *cand,
2643 comp_cost cost, bitmap depends_on, tree value,
2644 int inv_expr_id)
2646 unsigned i, s;
2648 if (infinite_cost_p (cost))
2650 BITMAP_FREE (depends_on);
2651 return;
2654 if (data->consider_all_candidates)
2656 use->cost_map[cand->id].cand = cand;
2657 use->cost_map[cand->id].cost = cost;
2658 use->cost_map[cand->id].depends_on = depends_on;
2659 use->cost_map[cand->id].value = value;
2660 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2661 return;
2664 /* n_map_members is a power of two, so this computes modulo. */
2665 s = cand->id & (use->n_map_members - 1);
2666 for (i = s; i < use->n_map_members; i++)
2667 if (!use->cost_map[i].cand)
2668 goto found;
2669 for (i = 0; i < s; i++)
2670 if (!use->cost_map[i].cand)
2671 goto found;
2673 gcc_unreachable ();
2675 found:
2676 use->cost_map[i].cand = cand;
2677 use->cost_map[i].cost = cost;
2678 use->cost_map[i].depends_on = depends_on;
2679 use->cost_map[i].value = value;
2680 use->cost_map[i].inv_expr_id = inv_expr_id;
2683 /* Gets cost of (USE, CANDIDATE) pair. */
2685 static struct cost_pair *
2686 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2687 struct iv_cand *cand)
2689 unsigned i, s;
2690 struct cost_pair *ret;
2692 if (!cand)
2693 return NULL;
2695 if (data->consider_all_candidates)
2697 ret = use->cost_map + cand->id;
2698 if (!ret->cand)
2699 return NULL;
2701 return ret;
2704 /* n_map_members is a power of two, so this computes modulo. */
2705 s = cand->id & (use->n_map_members - 1);
2706 for (i = s; i < use->n_map_members; i++)
2707 if (use->cost_map[i].cand == cand)
2708 return use->cost_map + i;
2710 for (i = 0; i < s; i++)
2711 if (use->cost_map[i].cand == cand)
2712 return use->cost_map + i;
2714 return NULL;
2717 /* Returns estimate on cost of computing SEQ. */
2719 static unsigned
2720 seq_cost (rtx seq, bool speed)
2722 unsigned cost = 0;
2723 rtx set;
2725 for (; seq; seq = NEXT_INSN (seq))
2727 set = single_set (seq);
2728 if (set)
2729 cost += rtx_cost (set, SET,speed);
2730 else
2731 cost++;
2734 return cost;
2737 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2738 static rtx
2739 produce_memory_decl_rtl (tree obj, int *regno)
2741 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2742 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2743 rtx x;
2745 gcc_assert (obj);
2746 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2748 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2749 x = gen_rtx_SYMBOL_REF (address_mode, name);
2750 SET_SYMBOL_REF_DECL (x, obj);
2751 x = gen_rtx_MEM (DECL_MODE (obj), x);
2752 set_mem_addr_space (x, as);
2753 targetm.encode_section_info (obj, x, true);
2755 else
2757 x = gen_raw_REG (address_mode, (*regno)++);
2758 x = gen_rtx_MEM (DECL_MODE (obj), x);
2759 set_mem_addr_space (x, as);
2762 return x;
2765 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2766 walk_tree. DATA contains the actual fake register number. */
2768 static tree
2769 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2771 tree obj = NULL_TREE;
2772 rtx x = NULL_RTX;
2773 int *regno = (int *) data;
2775 switch (TREE_CODE (*expr_p))
2777 case ADDR_EXPR:
2778 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2779 handled_component_p (*expr_p);
2780 expr_p = &TREE_OPERAND (*expr_p, 0))
2781 continue;
2782 obj = *expr_p;
2783 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2784 x = produce_memory_decl_rtl (obj, regno);
2785 break;
2787 case SSA_NAME:
2788 *ws = 0;
2789 obj = SSA_NAME_VAR (*expr_p);
2790 if (!DECL_RTL_SET_P (obj))
2791 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2792 break;
2794 case VAR_DECL:
2795 case PARM_DECL:
2796 case RESULT_DECL:
2797 *ws = 0;
2798 obj = *expr_p;
2800 if (DECL_RTL_SET_P (obj))
2801 break;
2803 if (DECL_MODE (obj) == BLKmode)
2804 x = produce_memory_decl_rtl (obj, regno);
2805 else
2806 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2808 break;
2810 default:
2811 break;
2814 if (x)
2816 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2817 SET_DECL_RTL (obj, x);
2820 return NULL_TREE;
2823 /* Determines cost of the computation of EXPR. */
2825 static unsigned
2826 computation_cost (tree expr, bool speed)
2828 rtx seq, rslt;
2829 tree type = TREE_TYPE (expr);
2830 unsigned cost;
2831 /* Avoid using hard regs in ways which may be unsupported. */
2832 int regno = LAST_VIRTUAL_REGISTER + 1;
2833 struct cgraph_node *node = cgraph_node (current_function_decl);
2834 enum node_frequency real_frequency = node->frequency;
2836 node->frequency = NODE_FREQUENCY_NORMAL;
2837 crtl->maybe_hot_insn_p = speed;
2838 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2839 start_sequence ();
2840 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2841 seq = get_insns ();
2842 end_sequence ();
2843 default_rtl_profile ();
2844 node->frequency = real_frequency;
2846 cost = seq_cost (seq, speed);
2847 if (MEM_P (rslt))
2848 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2849 TYPE_ADDR_SPACE (type), speed);
2851 return cost;
2854 /* Returns variable containing the value of candidate CAND at statement AT. */
2856 static tree
2857 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2859 if (stmt_after_increment (loop, cand, stmt))
2860 return cand->var_after;
2861 else
2862 return cand->var_before;
2865 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2866 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2869 tree_int_cst_sign_bit (const_tree t)
2871 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2872 unsigned HOST_WIDE_INT w;
2874 if (bitno < HOST_BITS_PER_WIDE_INT)
2875 w = TREE_INT_CST_LOW (t);
2876 else
2878 w = TREE_INT_CST_HIGH (t);
2879 bitno -= HOST_BITS_PER_WIDE_INT;
2882 return (w >> bitno) & 1;
2885 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2886 same precision that is at least as wide as the precision of TYPE, stores
2887 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2888 type of A and B. */
2890 static tree
2891 determine_common_wider_type (tree *a, tree *b)
2893 tree wider_type = NULL;
2894 tree suba, subb;
2895 tree atype = TREE_TYPE (*a);
2897 if (CONVERT_EXPR_P (*a))
2899 suba = TREE_OPERAND (*a, 0);
2900 wider_type = TREE_TYPE (suba);
2901 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2902 return atype;
2904 else
2905 return atype;
2907 if (CONVERT_EXPR_P (*b))
2909 subb = TREE_OPERAND (*b, 0);
2910 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2911 return atype;
2913 else
2914 return atype;
2916 *a = suba;
2917 *b = subb;
2918 return wider_type;
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND at statement AT in LOOP. The expression is stored in a decomposed
2923 form into AFF. Returns false if USE cannot be expressed using CAND. */
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927 struct iv_use *use, struct iv_cand *cand, gimple at,
2928 struct affine_tree_combination *aff)
2930 tree ubase = use->iv->base;
2931 tree ustep = use->iv->step;
2932 tree cbase = cand->iv->base;
2933 tree cstep = cand->iv->step, cstep_common;
2934 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935 tree common_type, var;
2936 tree uutype;
2937 aff_tree cbase_aff, var_aff;
2938 double_int rat;
2940 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2942 /* We do not have a precision to express the values of use. */
2943 return false;
2946 var = var_at_stmt (loop, cand, at);
2947 uutype = unsigned_type_for (utype);
2949 /* If the conversion is not noop, perform it. */
2950 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2952 cstep = fold_convert (uutype, cstep);
2953 cbase = fold_convert (uutype, cbase);
2954 var = fold_convert (uutype, var);
2957 if (!constant_multiple_of (ustep, cstep, &rat))
2958 return false;
2960 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2961 type, we achieve better folding by computing their difference in this
2962 wider type, and cast the result to UUTYPE. We do not need to worry about
2963 overflows, as all the arithmetics will in the end be performed in UUTYPE
2964 anyway. */
2965 common_type = determine_common_wider_type (&ubase, &cbase);
2967 /* use = ubase - ratio * cbase + ratio * var. */
2968 tree_to_aff_combination (ubase, common_type, aff);
2969 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2970 tree_to_aff_combination (var, uutype, &var_aff);
2972 /* We need to shift the value if we are after the increment. */
2973 if (stmt_after_increment (loop, cand, at))
2975 aff_tree cstep_aff;
2977 if (common_type != uutype)
2978 cstep_common = fold_convert (common_type, cstep);
2979 else
2980 cstep_common = cstep;
2982 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2983 aff_combination_add (&cbase_aff, &cstep_aff);
2986 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2987 aff_combination_add (aff, &cbase_aff);
2988 if (common_type != uutype)
2989 aff_combination_convert (aff, uutype);
2991 aff_combination_scale (&var_aff, rat);
2992 aff_combination_add (aff, &var_aff);
2994 return true;
2997 /* Determines the expression by that USE is expressed from induction variable
2998 CAND at statement AT in LOOP. The computation is unshared. */
3000 static tree
3001 get_computation_at (struct loop *loop,
3002 struct iv_use *use, struct iv_cand *cand, gimple at)
3004 aff_tree aff;
3005 tree type = TREE_TYPE (use->iv->base);
3007 if (!get_computation_aff (loop, use, cand, at, &aff))
3008 return NULL_TREE;
3009 unshare_aff_combination (&aff);
3010 return fold_convert (type, aff_combination_to_tree (&aff));
3013 /* Determines the expression by that USE is expressed from induction variable
3014 CAND in LOOP. The computation is unshared. */
3016 static tree
3017 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3019 return get_computation_at (loop, use, cand, use->stmt);
3022 /* Adjust the cost COST for being in loop setup rather than loop body.
3023 If we're optimizing for space, the loop setup overhead is constant;
3024 if we're optimizing for speed, amortize it over the per-iteration cost. */
3025 static unsigned
3026 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3028 if (cost == INFTY)
3029 return cost;
3030 else if (optimize_loop_for_speed_p (data->current_loop))
3031 return cost / avg_loop_niter (data->current_loop);
3032 else
3033 return cost;
3036 /* Returns cost of addition in MODE. */
3038 static unsigned
3039 add_cost (enum machine_mode mode, bool speed)
3041 static unsigned costs[NUM_MACHINE_MODES];
3042 rtx seq;
3043 unsigned cost;
3045 if (costs[mode])
3046 return costs[mode];
3048 start_sequence ();
3049 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3050 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3052 NULL_RTX);
3053 seq = get_insns ();
3054 end_sequence ();
3056 cost = seq_cost (seq, speed);
3057 if (!cost)
3058 cost = 1;
3060 costs[mode] = cost;
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3063 fprintf (dump_file, "Addition in %s costs %d\n",
3064 GET_MODE_NAME (mode), cost);
3065 return cost;
3068 /* Entry in a hashtable of already known costs for multiplication. */
3069 struct mbc_entry
3071 HOST_WIDE_INT cst; /* The constant to multiply by. */
3072 enum machine_mode mode; /* In mode. */
3073 unsigned cost; /* The cost. */
3076 /* Counts hash value for the ENTRY. */
3078 static hashval_t
3079 mbc_entry_hash (const void *entry)
3081 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3083 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3086 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3088 static int
3089 mbc_entry_eq (const void *entry1, const void *entry2)
3091 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3092 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3094 return (e1->mode == e2->mode
3095 && e1->cst == e2->cst);
3098 /* Returns cost of multiplication by constant CST in MODE. */
3100 unsigned
3101 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3103 static htab_t costs;
3104 struct mbc_entry **cached, act;
3105 rtx seq;
3106 unsigned cost;
3108 if (!costs)
3109 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3111 act.mode = mode;
3112 act.cst = cst;
3113 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3114 if (*cached)
3115 return (*cached)->cost;
3117 *cached = XNEW (struct mbc_entry);
3118 (*cached)->mode = mode;
3119 (*cached)->cst = cst;
3121 start_sequence ();
3122 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3123 gen_int_mode (cst, mode), NULL_RTX, 0);
3124 seq = get_insns ();
3125 end_sequence ();
3127 cost = seq_cost (seq, speed);
3129 if (dump_file && (dump_flags & TDF_DETAILS))
3130 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3131 (int) cst, GET_MODE_NAME (mode), cost);
3133 (*cached)->cost = cost;
3135 return cost;
3138 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3139 validity for a memory reference accessing memory of mode MODE in
3140 address space AS. */
3142 DEF_VEC_P (sbitmap);
3143 DEF_VEC_ALLOC_P (sbitmap, heap);
3145 bool
3146 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3147 addr_space_t as)
3149 #define MAX_RATIO 128
3150 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3151 static VEC (sbitmap, heap) *valid_mult_list;
3152 sbitmap valid_mult;
3154 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3155 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3157 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3158 if (!valid_mult)
3160 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3161 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3162 rtx addr;
3163 HOST_WIDE_INT i;
3165 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166 sbitmap_zero (valid_mult);
3167 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3170 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3171 if (memory_address_addr_space_p (mode, addr, as))
3172 SET_BIT (valid_mult, i + MAX_RATIO);
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3177 fprintf (dump_file, " allowed multipliers:");
3178 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3179 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3180 fprintf (dump_file, " %d", (int) i);
3181 fprintf (dump_file, "\n");
3182 fprintf (dump_file, "\n");
3185 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3188 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3189 return false;
3191 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3194 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3195 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3196 variable is omitted. Compute the cost for a memory reference that accesses
3197 a memory location of mode MEM_MODE in address space AS.
3199 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3200 size of MEM_MODE / RATIO) is available. To make this determination, we
3201 look at the size of the increment to be made, which is given in CSTEP.
3202 CSTEP may be zero if the step is unknown.
3203 STMT_AFTER_INC is true iff the statement we're looking at is after the
3204 increment of the original biv.
3206 TODO -- there must be some better way. This all is quite crude. */
3208 typedef struct
3210 HOST_WIDE_INT min_offset, max_offset;
3211 unsigned costs[2][2][2][2];
3212 } *address_cost_data;
3214 DEF_VEC_P (address_cost_data);
3215 DEF_VEC_ALLOC_P (address_cost_data, heap);
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221 addr_space_t as, bool speed,
3222 bool stmt_after_inc, bool *may_autoinc)
3224 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225 static VEC(address_cost_data, heap) *address_cost_data_list;
3226 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227 address_cost_data data;
3228 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230 unsigned cost, acost, complexity;
3231 bool offset_p, ratio_p, autoinc;
3232 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233 unsigned HOST_WIDE_INT mask;
3234 unsigned bits;
3236 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3237 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3238 data_index + 1);
3240 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3241 if (!data)
3243 HOST_WIDE_INT i;
3244 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3245 HOST_WIDE_INT rat, off;
3246 int old_cse_not_expected;
3247 unsigned sym_p, var_p, off_p, rat_p, add_c;
3248 rtx seq, addr, base;
3249 rtx reg0, reg1;
3251 data = (address_cost_data) xcalloc (1, sizeof (*data));
3253 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3255 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3256 for (i = start; i <= 1 << 20; i <<= 1)
3258 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3259 if (!memory_address_addr_space_p (mem_mode, addr, as))
3260 break;
3262 data->max_offset = i == start ? 0 : i >> 1;
3263 off = data->max_offset;
3265 for (i = start; i <= 1 << 20; i <<= 1)
3267 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3268 if (!memory_address_addr_space_p (mem_mode, addr, as))
3269 break;
3271 data->min_offset = i == start ? 0 : -(i >> 1);
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, "get_address_cost:\n");
3276 fprintf (dump_file, " min offset %s %d\n",
3277 GET_MODE_NAME (mem_mode),
3278 (int) data->min_offset);
3279 fprintf (dump_file, " max offset %s %d\n",
3280 GET_MODE_NAME (mem_mode),
3281 (int) data->max_offset);
3284 rat = 1;
3285 for (i = 2; i <= MAX_RATIO; i++)
3286 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3288 rat = i;
3289 break;
3292 /* Compute the cost of various addressing modes. */
3293 acost = 0;
3294 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3295 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3297 if (HAVE_PRE_DECREMENT)
3299 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3300 has_predec[mem_mode]
3301 = memory_address_addr_space_p (mem_mode, addr, as);
3303 if (HAVE_POST_DECREMENT)
3305 addr = gen_rtx_POST_DEC (address_mode, reg0);
3306 has_postdec[mem_mode]
3307 = memory_address_addr_space_p (mem_mode, addr, as);
3309 if (HAVE_PRE_INCREMENT)
3311 addr = gen_rtx_PRE_INC (address_mode, reg0);
3312 has_preinc[mem_mode]
3313 = memory_address_addr_space_p (mem_mode, addr, as);
3315 if (HAVE_POST_INCREMENT)
3317 addr = gen_rtx_POST_INC (address_mode, reg0);
3318 has_postinc[mem_mode]
3319 = memory_address_addr_space_p (mem_mode, addr, as);
3321 for (i = 0; i < 16; i++)
3323 sym_p = i & 1;
3324 var_p = (i >> 1) & 1;
3325 off_p = (i >> 2) & 1;
3326 rat_p = (i >> 3) & 1;
3328 addr = reg0;
3329 if (rat_p)
3330 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3331 gen_int_mode (rat, address_mode));
3333 if (var_p)
3334 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3336 if (sym_p)
3338 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3339 /* ??? We can run into trouble with some backends by presenting
3340 it with symbols which haven't been properly passed through
3341 targetm.encode_section_info. By setting the local bit, we
3342 enhance the probability of things working. */
3343 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3345 if (off_p)
3346 base = gen_rtx_fmt_e (CONST, address_mode,
3347 gen_rtx_fmt_ee
3348 (PLUS, address_mode, base,
3349 gen_int_mode (off, address_mode)));
3351 else if (off_p)
3352 base = gen_int_mode (off, address_mode);
3353 else
3354 base = NULL_RTX;
3356 if (base)
3357 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3359 start_sequence ();
3360 /* To avoid splitting addressing modes, pretend that no cse will
3361 follow. */
3362 old_cse_not_expected = cse_not_expected;
3363 cse_not_expected = true;
3364 addr = memory_address_addr_space (mem_mode, addr, as);
3365 cse_not_expected = old_cse_not_expected;
3366 seq = get_insns ();
3367 end_sequence ();
3369 acost = seq_cost (seq, speed);
3370 acost += address_cost (addr, mem_mode, as, speed);
3372 if (!acost)
3373 acost = 1;
3374 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3377 /* On some targets, it is quite expensive to load symbol to a register,
3378 which makes addresses that contain symbols look much more expensive.
3379 However, the symbol will have to be loaded in any case before the
3380 loop (and quite likely we have it in register already), so it does not
3381 make much sense to penalize them too heavily. So make some final
3382 tweaks for the SYMBOL_PRESENT modes:
3384 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3385 var is cheaper, use this mode with small penalty.
3386 If VAR_PRESENT is true, try whether the mode with
3387 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3388 if this is the case, use it. */
3389 add_c = add_cost (address_mode, speed);
3390 for (i = 0; i < 8; i++)
3392 var_p = i & 1;
3393 off_p = (i >> 1) & 1;
3394 rat_p = (i >> 2) & 1;
3396 acost = data->costs[0][1][off_p][rat_p] + 1;
3397 if (var_p)
3398 acost += add_c;
3400 if (acost < data->costs[1][var_p][off_p][rat_p])
3401 data->costs[1][var_p][off_p][rat_p] = acost;
3404 if (dump_file && (dump_flags & TDF_DETAILS))
3406 fprintf (dump_file, "Address costs:\n");
3408 for (i = 0; i < 16; i++)
3410 sym_p = i & 1;
3411 var_p = (i >> 1) & 1;
3412 off_p = (i >> 2) & 1;
3413 rat_p = (i >> 3) & 1;
3415 fprintf (dump_file, " ");
3416 if (sym_p)
3417 fprintf (dump_file, "sym + ");
3418 if (var_p)
3419 fprintf (dump_file, "var + ");
3420 if (off_p)
3421 fprintf (dump_file, "cst + ");
3422 if (rat_p)
3423 fprintf (dump_file, "rat * ");
3425 acost = data->costs[sym_p][var_p][off_p][rat_p];
3426 fprintf (dump_file, "index costs %d\n", acost);
3428 if (has_predec[mem_mode] || has_postdec[mem_mode]
3429 || has_preinc[mem_mode] || has_postinc[mem_mode])
3430 fprintf (dump_file, " May include autoinc/dec\n");
3431 fprintf (dump_file, "\n");
3434 VEC_replace (address_cost_data, address_cost_data_list,
3435 data_index, data);
3438 bits = GET_MODE_BITSIZE (address_mode);
3439 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3440 offset &= mask;
3441 if ((offset >> (bits - 1) & 1))
3442 offset |= ~mask;
3443 s_offset = offset;
3445 autoinc = false;
3446 msize = GET_MODE_SIZE (mem_mode);
3447 autoinc_offset = offset;
3448 if (stmt_after_inc)
3449 autoinc_offset += ratio * cstep;
3450 if (symbol_present || var_present || ratio != 1)
3451 autoinc = false;
3452 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3453 && msize == cstep)
3454 || (has_postdec[mem_mode] && autoinc_offset == 0
3455 && msize == -cstep)
3456 || (has_preinc[mem_mode] && autoinc_offset == msize
3457 && msize == cstep)
3458 || (has_predec[mem_mode] && autoinc_offset == -msize
3459 && msize == -cstep))
3460 autoinc = true;
3462 cost = 0;
3463 offset_p = (s_offset != 0
3464 && data->min_offset <= s_offset
3465 && s_offset <= data->max_offset);
3466 ratio_p = (ratio != 1
3467 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3469 if (ratio != 1 && !ratio_p)
3470 cost += multiply_by_cost (ratio, address_mode, speed);
3472 if (s_offset && !offset_p && !symbol_present)
3473 cost += add_cost (address_mode, speed);
3475 if (may_autoinc)
3476 *may_autoinc = autoinc;
3477 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3478 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3479 return new_cost (cost + acost, complexity);
3482 /* Estimates cost of forcing expression EXPR into a variable. */
3484 static comp_cost
3485 force_expr_to_var_cost (tree expr, bool speed)
3487 static bool costs_initialized = false;
3488 static unsigned integer_cost [2];
3489 static unsigned symbol_cost [2];
3490 static unsigned address_cost [2];
3491 tree op0, op1;
3492 comp_cost cost0, cost1, cost;
3493 enum machine_mode mode;
3495 if (!costs_initialized)
3497 tree type = build_pointer_type (integer_type_node);
3498 tree var, addr;
3499 rtx x;
3500 int i;
3502 var = create_tmp_var_raw (integer_type_node, "test_var");
3503 TREE_STATIC (var) = 1;
3504 x = produce_memory_decl_rtl (var, NULL);
3505 SET_DECL_RTL (var, x);
3507 addr = build1 (ADDR_EXPR, type, var);
3510 for (i = 0; i < 2; i++)
3512 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3513 2000), i);
3515 symbol_cost[i] = computation_cost (addr, i) + 1;
3517 address_cost[i]
3518 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3519 addr,
3520 build_int_cst (sizetype, 2000)), i) + 1;
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3524 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3525 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3526 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3527 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3528 fprintf (dump_file, "\n");
3532 costs_initialized = true;
3535 STRIP_NOPS (expr);
3537 if (SSA_VAR_P (expr))
3538 return zero_cost;
3540 if (is_gimple_min_invariant (expr))
3542 if (TREE_CODE (expr) == INTEGER_CST)
3543 return new_cost (integer_cost [speed], 0);
3545 if (TREE_CODE (expr) == ADDR_EXPR)
3547 tree obj = TREE_OPERAND (expr, 0);
3549 if (TREE_CODE (obj) == VAR_DECL
3550 || TREE_CODE (obj) == PARM_DECL
3551 || TREE_CODE (obj) == RESULT_DECL)
3552 return new_cost (symbol_cost [speed], 0);
3555 return new_cost (address_cost [speed], 0);
3558 switch (TREE_CODE (expr))
3560 case POINTER_PLUS_EXPR:
3561 case PLUS_EXPR:
3562 case MINUS_EXPR:
3563 case MULT_EXPR:
3564 op0 = TREE_OPERAND (expr, 0);
3565 op1 = TREE_OPERAND (expr, 1);
3566 STRIP_NOPS (op0);
3567 STRIP_NOPS (op1);
3569 if (is_gimple_val (op0))
3570 cost0 = zero_cost;
3571 else
3572 cost0 = force_expr_to_var_cost (op0, speed);
3574 if (is_gimple_val (op1))
3575 cost1 = zero_cost;
3576 else
3577 cost1 = force_expr_to_var_cost (op1, speed);
3579 break;
3581 case NEGATE_EXPR:
3582 op0 = TREE_OPERAND (expr, 0);
3583 STRIP_NOPS (op0);
3584 op1 = NULL_TREE;
3586 if (is_gimple_val (op0))
3587 cost0 = zero_cost;
3588 else
3589 cost0 = force_expr_to_var_cost (op0, speed);
3591 cost1 = zero_cost;
3592 break;
3594 default:
3595 /* Just an arbitrary value, FIXME. */
3596 return new_cost (target_spill_cost[speed], 0);
3599 mode = TYPE_MODE (TREE_TYPE (expr));
3600 switch (TREE_CODE (expr))
3602 case POINTER_PLUS_EXPR:
3603 case PLUS_EXPR:
3604 case MINUS_EXPR:
3605 case NEGATE_EXPR:
3606 cost = new_cost (add_cost (mode, speed), 0);
3607 break;
3609 case MULT_EXPR:
3610 if (cst_and_fits_in_hwi (op0))
3611 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3612 else if (cst_and_fits_in_hwi (op1))
3613 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3614 else
3615 return new_cost (target_spill_cost [speed], 0);
3616 break;
3618 default:
3619 gcc_unreachable ();
3622 cost = add_costs (cost, cost0);
3623 cost = add_costs (cost, cost1);
3625 /* Bound the cost by target_spill_cost. The parts of complicated
3626 computations often are either loop invariant or at least can
3627 be shared between several iv uses, so letting this grow without
3628 limits would not give reasonable results. */
3629 if (cost.cost > (int) target_spill_cost [speed])
3630 cost.cost = target_spill_cost [speed];
3632 return cost;
3635 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3636 invariants the computation depends on. */
3638 static comp_cost
3639 force_var_cost (struct ivopts_data *data,
3640 tree expr, bitmap *depends_on)
3642 if (depends_on)
3644 fd_ivopts_data = data;
3645 walk_tree (&expr, find_depends, depends_on, NULL);
3648 return force_expr_to_var_cost (expr, data->speed);
3651 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3652 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3653 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3654 invariants the computation depends on. */
3656 static comp_cost
3657 split_address_cost (struct ivopts_data *data,
3658 tree addr, bool *symbol_present, bool *var_present,
3659 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3661 tree core;
3662 HOST_WIDE_INT bitsize;
3663 HOST_WIDE_INT bitpos;
3664 tree toffset;
3665 enum machine_mode mode;
3666 int unsignedp, volatilep;
3668 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3669 &unsignedp, &volatilep, false);
3671 if (toffset != 0
3672 || bitpos % BITS_PER_UNIT != 0
3673 || TREE_CODE (core) != VAR_DECL)
3675 *symbol_present = false;
3676 *var_present = true;
3677 fd_ivopts_data = data;
3678 walk_tree (&addr, find_depends, depends_on, NULL);
3679 return new_cost (target_spill_cost[data->speed], 0);
3682 *offset += bitpos / BITS_PER_UNIT;
3683 if (TREE_STATIC (core)
3684 || DECL_EXTERNAL (core))
3686 *symbol_present = true;
3687 *var_present = false;
3688 return zero_cost;
3691 *symbol_present = false;
3692 *var_present = true;
3693 return zero_cost;
3696 /* Estimates cost of expressing difference of addresses E1 - E2 as
3697 var + symbol + offset. The value of offset is added to OFFSET,
3698 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3699 part is missing. DEPENDS_ON is a set of the invariants the computation
3700 depends on. */
3702 static comp_cost
3703 ptr_difference_cost (struct ivopts_data *data,
3704 tree e1, tree e2, bool *symbol_present, bool *var_present,
3705 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3707 HOST_WIDE_INT diff = 0;
3708 aff_tree aff_e1, aff_e2;
3709 tree type;
3711 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3713 if (ptr_difference_const (e1, e2, &diff))
3715 *offset += diff;
3716 *symbol_present = false;
3717 *var_present = false;
3718 return zero_cost;
3721 if (integer_zerop (e2))
3722 return split_address_cost (data, TREE_OPERAND (e1, 0),
3723 symbol_present, var_present, offset, depends_on);
3725 *symbol_present = false;
3726 *var_present = true;
3728 type = signed_type_for (TREE_TYPE (e1));
3729 tree_to_aff_combination (e1, type, &aff_e1);
3730 tree_to_aff_combination (e2, type, &aff_e2);
3731 aff_combination_scale (&aff_e2, double_int_minus_one);
3732 aff_combination_add (&aff_e1, &aff_e2);
3734 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3737 /* Estimates cost of expressing difference E1 - E2 as
3738 var + symbol + offset. The value of offset is added to OFFSET,
3739 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3740 part is missing. DEPENDS_ON is a set of the invariants the computation
3741 depends on. */
3743 static comp_cost
3744 difference_cost (struct ivopts_data *data,
3745 tree e1, tree e2, bool *symbol_present, bool *var_present,
3746 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3748 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3749 unsigned HOST_WIDE_INT off1, off2;
3750 aff_tree aff_e1, aff_e2;
3751 tree type;
3753 e1 = strip_offset (e1, &off1);
3754 e2 = strip_offset (e2, &off2);
3755 *offset += off1 - off2;
3757 STRIP_NOPS (e1);
3758 STRIP_NOPS (e2);
3760 if (TREE_CODE (e1) == ADDR_EXPR)
3761 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3762 offset, depends_on);
3763 *symbol_present = false;
3765 if (operand_equal_p (e1, e2, 0))
3767 *var_present = false;
3768 return zero_cost;
3771 *var_present = true;
3773 if (integer_zerop (e2))
3774 return force_var_cost (data, e1, depends_on);
3776 if (integer_zerop (e1))
3778 comp_cost cost = force_var_cost (data, e2, depends_on);
3779 cost.cost += multiply_by_cost (-1, mode, data->speed);
3780 return cost;
3783 type = signed_type_for (TREE_TYPE (e1));
3784 tree_to_aff_combination (e1, type, &aff_e1);
3785 tree_to_aff_combination (e2, type, &aff_e2);
3786 aff_combination_scale (&aff_e2, double_int_minus_one);
3787 aff_combination_add (&aff_e1, &aff_e2);
3789 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3792 /* Returns true if AFF1 and AFF2 are identical. */
3794 static bool
3795 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3797 unsigned i;
3799 if (aff1->n != aff2->n)
3800 return false;
3802 for (i = 0; i < aff1->n; i++)
3804 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3805 return false;
3807 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3808 return false;
3810 return true;
3813 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3814 requires a new compiler generated temporary. Returns -1 otherwise.
3815 ADDRESS_P is a flag indicating if the expression is for address
3816 computation. */
3818 static int
3819 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3820 tree cbase, HOST_WIDE_INT ratio,
3821 bool address_p)
3823 aff_tree ubase_aff, cbase_aff;
3824 tree expr, ub, cb;
3825 struct iv_inv_expr_ent ent;
3826 struct iv_inv_expr_ent **slot;
3828 STRIP_NOPS (ubase);
3829 STRIP_NOPS (cbase);
3830 ub = ubase;
3831 cb = cbase;
3833 if ((TREE_CODE (ubase) == INTEGER_CST)
3834 && (TREE_CODE (cbase) == INTEGER_CST))
3835 return -1;
3837 /* Strips the constant part. */
3838 if (TREE_CODE (ubase) == PLUS_EXPR
3839 || TREE_CODE (ubase) == MINUS_EXPR
3840 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3842 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3843 ubase = TREE_OPERAND (ubase, 0);
3846 /* Strips the constant part. */
3847 if (TREE_CODE (cbase) == PLUS_EXPR
3848 || TREE_CODE (cbase) == MINUS_EXPR
3849 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3851 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3852 cbase = TREE_OPERAND (cbase, 0);
3855 if (address_p)
3857 if (((TREE_CODE (ubase) == SSA_NAME)
3858 || (TREE_CODE (ubase) == ADDR_EXPR
3859 && is_gimple_min_invariant (ubase)))
3860 && (TREE_CODE (cbase) == INTEGER_CST))
3861 return -1;
3863 if (((TREE_CODE (cbase) == SSA_NAME)
3864 || (TREE_CODE (cbase) == ADDR_EXPR
3865 && is_gimple_min_invariant (cbase)))
3866 && (TREE_CODE (ubase) == INTEGER_CST))
3867 return -1;
3870 if (ratio == 1)
3872 if(operand_equal_p (ubase, cbase, 0))
3873 return -1;
3875 if (TREE_CODE (ubase) == ADDR_EXPR
3876 && TREE_CODE (cbase) == ADDR_EXPR)
3878 tree usym, csym;
3880 usym = TREE_OPERAND (ubase, 0);
3881 csym = TREE_OPERAND (cbase, 0);
3882 if (TREE_CODE (usym) == ARRAY_REF)
3884 tree ind = TREE_OPERAND (usym, 1);
3885 if (TREE_CODE (ind) == INTEGER_CST
3886 && host_integerp (ind, 0)
3887 && TREE_INT_CST_LOW (ind) == 0)
3888 usym = TREE_OPERAND (usym, 0);
3890 if (TREE_CODE (csym) == ARRAY_REF)
3892 tree ind = TREE_OPERAND (csym, 1);
3893 if (TREE_CODE (ind) == INTEGER_CST
3894 && host_integerp (ind, 0)
3895 && TREE_INT_CST_LOW (ind) == 0)
3896 csym = TREE_OPERAND (csym, 0);
3898 if (operand_equal_p (usym, csym, 0))
3899 return -1;
3901 /* Now do more complex comparison */
3902 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3903 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3904 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3905 return -1;
3908 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3909 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3911 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3912 aff_combination_add (&ubase_aff, &cbase_aff);
3913 expr = aff_combination_to_tree (&ubase_aff);
3914 ent.expr = expr;
3915 ent.hash = iterative_hash_expr (expr, 0);
3916 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3917 &ent, INSERT);
3918 if (*slot)
3919 return (*slot)->id;
3921 *slot = XNEW (struct iv_inv_expr_ent);
3922 (*slot)->expr = expr;
3923 (*slot)->hash = ent.hash;
3924 (*slot)->id = data->inv_expr_id++;
3925 return (*slot)->id;
3930 /* Determines the cost of the computation by that USE is expressed
3931 from induction variable CAND. If ADDRESS_P is true, we just need
3932 to create an address from it, otherwise we want to get it into
3933 register. A set of invariants we depend on is stored in
3934 DEPENDS_ON. AT is the statement at that the value is computed.
3935 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3936 addressing is likely. */
3938 static comp_cost
3939 get_computation_cost_at (struct ivopts_data *data,
3940 struct iv_use *use, struct iv_cand *cand,
3941 bool address_p, bitmap *depends_on, gimple at,
3942 bool *can_autoinc,
3943 int *inv_expr_id)
3945 tree ubase = use->iv->base, ustep = use->iv->step;
3946 tree cbase, cstep;
3947 tree utype = TREE_TYPE (ubase), ctype;
3948 unsigned HOST_WIDE_INT cstepi, offset = 0;
3949 HOST_WIDE_INT ratio, aratio;
3950 bool var_present, symbol_present, stmt_is_after_inc;
3951 comp_cost cost;
3952 double_int rat;
3953 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3955 *depends_on = NULL;
3957 /* Only consider real candidates. */
3958 if (!cand->iv)
3959 return infinite_cost;
3961 cbase = cand->iv->base;
3962 cstep = cand->iv->step;
3963 ctype = TREE_TYPE (cbase);
3965 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3967 /* We do not have a precision to express the values of use. */
3968 return infinite_cost;
3971 if (address_p)
3973 /* Do not try to express address of an object with computation based
3974 on address of a different object. This may cause problems in rtl
3975 level alias analysis (that does not expect this to be happening,
3976 as this is illegal in C), and would be unlikely to be useful
3977 anyway. */
3978 if (use->iv->base_object
3979 && cand->iv->base_object
3980 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3981 return infinite_cost;
3984 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986 /* TODO -- add direct handling of this case. */
3987 goto fallback;
3990 /* CSTEPI is removed from the offset in case statement is after the
3991 increment. If the step is not constant, we use zero instead.
3992 This is a bit imprecise (there is the extra addition), but
3993 redundancy elimination is likely to transform the code so that
3994 it uses value of the variable before increment anyway,
3995 so it is not that much unrealistic. */
3996 if (cst_and_fits_in_hwi (cstep))
3997 cstepi = int_cst_value (cstep);
3998 else
3999 cstepi = 0;
4001 if (!constant_multiple_of (ustep, cstep, &rat))
4002 return infinite_cost;
4004 if (double_int_fits_in_shwi_p (rat))
4005 ratio = double_int_to_shwi (rat);
4006 else
4007 return infinite_cost;
4009 STRIP_NOPS (cbase);
4010 ctype = TREE_TYPE (cbase);
4012 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4013 or ratio == 1, it is better to handle this like
4015 ubase - ratio * cbase + ratio * var
4017 (also holds in the case ratio == -1, TODO. */
4019 if (cst_and_fits_in_hwi (cbase))
4021 offset = - ratio * int_cst_value (cbase);
4022 cost = difference_cost (data,
4023 ubase, build_int_cst (utype, 0),
4024 &symbol_present, &var_present, &offset,
4025 depends_on);
4026 cost.cost /= avg_loop_niter (data->current_loop);
4028 else if (ratio == 1)
4030 cost = difference_cost (data,
4031 ubase, cbase,
4032 &symbol_present, &var_present, &offset,
4033 depends_on);
4034 cost.cost /= avg_loop_niter (data->current_loop);
4036 else if (address_p
4037 && !POINTER_TYPE_P (ctype)
4038 && multiplier_allowed_in_address_p
4039 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4040 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4042 cbase
4043 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4044 cost = difference_cost (data,
4045 ubase, cbase,
4046 &symbol_present, &var_present, &offset,
4047 depends_on);
4048 cost.cost /= avg_loop_niter (data->current_loop);
4050 else
4052 cost = force_var_cost (data, cbase, depends_on);
4053 cost = add_costs (cost,
4054 difference_cost (data,
4055 ubase, build_int_cst (utype, 0),
4056 &symbol_present, &var_present,
4057 &offset, depends_on));
4058 cost.cost /= avg_loop_niter (data->current_loop);
4059 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4062 if (inv_expr_id)
4064 *inv_expr_id =
4065 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4066 /* Clear depends on. */
4067 if (*inv_expr_id != -1 && depends_on && *depends_on)
4068 bitmap_clear (*depends_on);
4071 /* If we are after the increment, the value of the candidate is higher by
4072 one iteration. */
4073 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4074 if (stmt_is_after_inc)
4075 offset -= ratio * cstepi;
4077 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4078 (symbol/var1/const parts may be omitted). If we are looking for an
4079 address, find the cost of addressing this. */
4080 if (address_p)
4081 return add_costs (cost,
4082 get_address_cost (symbol_present, var_present,
4083 offset, ratio, cstepi,
4084 TYPE_MODE (TREE_TYPE (utype)),
4085 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4086 speed, stmt_is_after_inc,
4087 can_autoinc));
4089 /* Otherwise estimate the costs for computing the expression. */
4090 if (!symbol_present && !var_present && !offset)
4092 if (ratio != 1)
4093 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4094 return cost;
4097 /* Symbol + offset should be compile-time computable so consider that they
4098 are added once to the variable, if present. */
4099 if (var_present && (symbol_present || offset))
4100 cost.cost += adjust_setup_cost (data,
4101 add_cost (TYPE_MODE (ctype), speed));
4103 /* Having offset does not affect runtime cost in case it is added to
4104 symbol, but it increases complexity. */
4105 if (offset)
4106 cost.complexity++;
4108 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4110 aratio = ratio > 0 ? ratio : -ratio;
4111 if (aratio != 1)
4112 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4113 return cost;
4115 fallback:
4116 if (can_autoinc)
4117 *can_autoinc = false;
4120 /* Just get the expression, expand it and measure the cost. */
4121 tree comp = get_computation_at (data->current_loop, use, cand, at);
4123 if (!comp)
4124 return infinite_cost;
4126 if (address_p)
4127 comp = build_simple_mem_ref (comp);
4129 return new_cost (computation_cost (comp, speed), 0);
4133 /* Determines the cost of the computation by that USE is expressed
4134 from induction variable CAND. If ADDRESS_P is true, we just need
4135 to create an address from it, otherwise we want to get it into
4136 register. A set of invariants we depend on is stored in
4137 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4138 autoinc addressing is likely. */
4140 static comp_cost
4141 get_computation_cost (struct ivopts_data *data,
4142 struct iv_use *use, struct iv_cand *cand,
4143 bool address_p, bitmap *depends_on,
4144 bool *can_autoinc, int *inv_expr_id)
4146 return get_computation_cost_at (data,
4147 use, cand, address_p, depends_on, use->stmt,
4148 can_autoinc, inv_expr_id);
4151 /* Determines cost of basing replacement of USE on CAND in a generic
4152 expression. */
4154 static bool
4155 determine_use_iv_cost_generic (struct ivopts_data *data,
4156 struct iv_use *use, struct iv_cand *cand)
4158 bitmap depends_on;
4159 comp_cost cost;
4160 int inv_expr_id = -1;
4162 /* The simple case first -- if we need to express value of the preserved
4163 original biv, the cost is 0. This also prevents us from counting the
4164 cost of increment twice -- once at this use and once in the cost of
4165 the candidate. */
4166 if (cand->pos == IP_ORIGINAL
4167 && cand->incremented_at == use->stmt)
4169 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4170 return true;
4173 cost = get_computation_cost (data, use, cand, false, &depends_on,
4174 NULL, &inv_expr_id);
4176 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4177 inv_expr_id);
4179 return !infinite_cost_p (cost);
4182 /* Determines cost of basing replacement of USE on CAND in an address. */
4184 static bool
4185 determine_use_iv_cost_address (struct ivopts_data *data,
4186 struct iv_use *use, struct iv_cand *cand)
4188 bitmap depends_on;
4189 bool can_autoinc;
4190 int inv_expr_id = -1;
4191 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4192 &can_autoinc, &inv_expr_id);
4194 if (cand->ainc_use == use)
4196 if (can_autoinc)
4197 cost.cost -= cand->cost_step;
4198 /* If we generated the candidate solely for exploiting autoincrement
4199 opportunities, and it turns out it can't be used, set the cost to
4200 infinity to make sure we ignore it. */
4201 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4202 cost = infinite_cost;
4204 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4205 inv_expr_id);
4207 return !infinite_cost_p (cost);
4210 /* Computes value of candidate CAND at position AT in iteration NITER, and
4211 stores it to VAL. */
4213 static void
4214 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4215 aff_tree *val)
4217 aff_tree step, delta, nit;
4218 struct iv *iv = cand->iv;
4219 tree type = TREE_TYPE (iv->base);
4220 tree steptype = type;
4221 if (POINTER_TYPE_P (type))
4222 steptype = sizetype;
4224 tree_to_aff_combination (iv->step, steptype, &step);
4225 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4226 aff_combination_convert (&nit, steptype);
4227 aff_combination_mult (&nit, &step, &delta);
4228 if (stmt_after_increment (loop, cand, at))
4229 aff_combination_add (&delta, &step);
4231 tree_to_aff_combination (iv->base, type, val);
4232 aff_combination_add (val, &delta);
4235 /* Returns period of induction variable iv. */
4237 static tree
4238 iv_period (struct iv *iv)
4240 tree step = iv->step, period, type;
4241 tree pow2div;
4243 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4245 type = unsigned_type_for (TREE_TYPE (step));
4246 /* Period of the iv is lcm (step, type_range)/step -1,
4247 i.e., N*type_range/step - 1. Since type range is power
4248 of two, N == (step >> num_of_ending_zeros_binary (step),
4249 so the final result is
4251 (type_range >> num_of_ending_zeros_binary (step)) - 1
4254 pow2div = num_ending_zeros (step);
4256 period = build_low_bits_mask (type,
4257 (TYPE_PRECISION (type)
4258 - tree_low_cst (pow2div, 1)));
4260 return period;
4263 /* Returns the comparison operator used when eliminating the iv USE. */
4265 static enum tree_code
4266 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4268 struct loop *loop = data->current_loop;
4269 basic_block ex_bb;
4270 edge exit;
4272 ex_bb = gimple_bb (use->stmt);
4273 exit = EDGE_SUCC (ex_bb, 0);
4274 if (flow_bb_inside_loop_p (loop, exit->dest))
4275 exit = EDGE_SUCC (ex_bb, 1);
4277 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4280 /* Check whether it is possible to express the condition in USE by comparison
4281 of candidate CAND. If so, store the value compared with to BOUND. */
4283 static bool
4284 may_eliminate_iv (struct ivopts_data *data,
4285 struct iv_use *use, struct iv_cand *cand, tree *bound)
4287 basic_block ex_bb;
4288 edge exit;
4289 tree nit, period;
4290 struct loop *loop = data->current_loop;
4291 aff_tree bnd;
4292 struct tree_niter_desc *desc = NULL;
4294 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4295 return false;
4297 /* For now works only for exits that dominate the loop latch.
4298 TODO: extend to other conditions inside loop body. */
4299 ex_bb = gimple_bb (use->stmt);
4300 if (use->stmt != last_stmt (ex_bb)
4301 || gimple_code (use->stmt) != GIMPLE_COND
4302 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4303 return false;
4305 exit = EDGE_SUCC (ex_bb, 0);
4306 if (flow_bb_inside_loop_p (loop, exit->dest))
4307 exit = EDGE_SUCC (ex_bb, 1);
4308 if (flow_bb_inside_loop_p (loop, exit->dest))
4309 return false;
4311 nit = niter_for_exit (data, exit, &desc);
4312 if (!nit)
4313 return false;
4315 /* Determine whether we can use the variable to test the exit condition.
4316 This is the case iff the period of the induction variable is greater
4317 than the number of iterations for which the exit condition is true. */
4318 period = iv_period (cand->iv);
4320 /* If the number of iterations is constant, compare against it directly. */
4321 if (TREE_CODE (nit) == INTEGER_CST)
4323 /* See cand_value_at. */
4324 if (stmt_after_increment (loop, cand, use->stmt))
4326 if (!tree_int_cst_lt (nit, period))
4327 return false;
4329 else
4331 if (tree_int_cst_lt (period, nit))
4332 return false;
4336 /* If not, and if this is the only possible exit of the loop, see whether
4337 we can get a conservative estimate on the number of iterations of the
4338 entire loop and compare against that instead. */
4339 else
4341 double_int period_value, max_niter;
4343 max_niter = desc->max;
4344 if (stmt_after_increment (loop, cand, use->stmt))
4345 max_niter = double_int_add (max_niter, double_int_one);
4346 period_value = tree_to_double_int (period);
4347 if (double_int_ucmp (max_niter, period_value) > 0)
4349 /* See if we can take advantage of infered loop bound information. */
4350 if (loop_only_exit_p (loop, exit))
4352 if (!estimated_loop_iterations (loop, true, &max_niter))
4353 return false;
4354 /* The loop bound is already adjusted by adding 1. */
4355 if (double_int_ucmp (max_niter, period_value) > 0)
4356 return false;
4358 else
4359 return false;
4363 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4365 *bound = aff_combination_to_tree (&bnd);
4366 /* It is unlikely that computing the number of iterations using division
4367 would be more profitable than keeping the original induction variable. */
4368 if (expression_expensive_p (*bound))
4369 return false;
4370 return true;
4374 /* Determines cost of basing replacement of USE on CAND in a condition. */
4376 static bool
4377 determine_use_iv_cost_condition (struct ivopts_data *data,
4378 struct iv_use *use, struct iv_cand *cand)
4380 tree bound = NULL_TREE;
4381 struct iv *cmp_iv;
4382 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4383 comp_cost elim_cost, express_cost, cost;
4384 bool ok;
4385 int inv_expr_id = -1;
4386 tree *control_var, *bound_cst;
4388 /* Only consider real candidates. */
4389 if (!cand->iv)
4391 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4392 return false;
4395 /* Try iv elimination. */
4396 if (may_eliminate_iv (data, use, cand, &bound))
4398 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4399 /* The bound is a loop invariant, so it will be only computed
4400 once. */
4401 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4403 else
4404 elim_cost = infinite_cost;
4406 /* Try expressing the original giv. If it is compared with an invariant,
4407 note that we cannot get rid of it. */
4408 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4409 NULL, &cmp_iv);
4410 gcc_assert (ok);
4412 /* When the condition is a comparison of the candidate IV against
4413 zero, prefer this IV.
4415 TODO: The constant that we're substracting from the cost should
4416 be target-dependent. This information should be added to the
4417 target costs for each backend. */
4418 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4419 && integer_zerop (*bound_cst)
4420 && (operand_equal_p (*control_var, cand->var_after, 0)
4421 || operand_equal_p (*control_var, cand->var_before, 0)))
4422 elim_cost.cost -= 1;
4424 express_cost = get_computation_cost (data, use, cand, false,
4425 &depends_on_express, NULL,
4426 &inv_expr_id);
4427 fd_ivopts_data = data;
4428 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4430 /* Choose the better approach, preferring the eliminated IV. */
4431 if (compare_costs (elim_cost, express_cost) <= 0)
4433 cost = elim_cost;
4434 depends_on = depends_on_elim;
4435 depends_on_elim = NULL;
4437 else
4439 cost = express_cost;
4440 depends_on = depends_on_express;
4441 depends_on_express = NULL;
4442 bound = NULL_TREE;
4445 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4447 if (depends_on_elim)
4448 BITMAP_FREE (depends_on_elim);
4449 if (depends_on_express)
4450 BITMAP_FREE (depends_on_express);
4452 return !infinite_cost_p (cost);
4455 /* Determines cost of basing replacement of USE on CAND. Returns false
4456 if USE cannot be based on CAND. */
4458 static bool
4459 determine_use_iv_cost (struct ivopts_data *data,
4460 struct iv_use *use, struct iv_cand *cand)
4462 switch (use->type)
4464 case USE_NONLINEAR_EXPR:
4465 return determine_use_iv_cost_generic (data, use, cand);
4467 case USE_ADDRESS:
4468 return determine_use_iv_cost_address (data, use, cand);
4470 case USE_COMPARE:
4471 return determine_use_iv_cost_condition (data, use, cand);
4473 default:
4474 gcc_unreachable ();
4478 /* Return true if get_computation_cost indicates that autoincrement is
4479 a possibility for the pair of USE and CAND, false otherwise. */
4481 static bool
4482 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4483 struct iv_cand *cand)
4485 bitmap depends_on;
4486 bool can_autoinc;
4487 comp_cost cost;
4489 if (use->type != USE_ADDRESS)
4490 return false;
4492 cost = get_computation_cost (data, use, cand, true, &depends_on,
4493 &can_autoinc, NULL);
4495 BITMAP_FREE (depends_on);
4497 return !infinite_cost_p (cost) && can_autoinc;
4500 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4501 use that allows autoincrement, and set their AINC_USE if possible. */
4503 static void
4504 set_autoinc_for_original_candidates (struct ivopts_data *data)
4506 unsigned i, j;
4508 for (i = 0; i < n_iv_cands (data); i++)
4510 struct iv_cand *cand = iv_cand (data, i);
4511 struct iv_use *closest = NULL;
4512 if (cand->pos != IP_ORIGINAL)
4513 continue;
4514 for (j = 0; j < n_iv_uses (data); j++)
4516 struct iv_use *use = iv_use (data, j);
4517 unsigned uid = gimple_uid (use->stmt);
4518 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4519 || uid > gimple_uid (cand->incremented_at))
4520 continue;
4521 if (closest == NULL || uid > gimple_uid (closest->stmt))
4522 closest = use;
4524 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4525 continue;
4526 cand->ainc_use = closest;
4530 /* Finds the candidates for the induction variables. */
4532 static void
4533 find_iv_candidates (struct ivopts_data *data)
4535 /* Add commonly used ivs. */
4536 add_standard_iv_candidates (data);
4538 /* Add old induction variables. */
4539 add_old_ivs_candidates (data);
4541 /* Add induction variables derived from uses. */
4542 add_derived_ivs_candidates (data);
4544 set_autoinc_for_original_candidates (data);
4546 /* Record the important candidates. */
4547 record_important_candidates (data);
4550 /* Determines costs of basing the use of the iv on an iv candidate. */
4552 static void
4553 determine_use_iv_costs (struct ivopts_data *data)
4555 unsigned i, j;
4556 struct iv_use *use;
4557 struct iv_cand *cand;
4558 bitmap to_clear = BITMAP_ALLOC (NULL);
4560 alloc_use_cost_map (data);
4562 for (i = 0; i < n_iv_uses (data); i++)
4564 use = iv_use (data, i);
4566 if (data->consider_all_candidates)
4568 for (j = 0; j < n_iv_cands (data); j++)
4570 cand = iv_cand (data, j);
4571 determine_use_iv_cost (data, use, cand);
4574 else
4576 bitmap_iterator bi;
4578 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4580 cand = iv_cand (data, j);
4581 if (!determine_use_iv_cost (data, use, cand))
4582 bitmap_set_bit (to_clear, j);
4585 /* Remove the candidates for that the cost is infinite from
4586 the list of related candidates. */
4587 bitmap_and_compl_into (use->related_cands, to_clear);
4588 bitmap_clear (to_clear);
4592 BITMAP_FREE (to_clear);
4594 if (dump_file && (dump_flags & TDF_DETAILS))
4596 fprintf (dump_file, "Use-candidate costs:\n");
4598 for (i = 0; i < n_iv_uses (data); i++)
4600 use = iv_use (data, i);
4602 fprintf (dump_file, "Use %d:\n", i);
4603 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4604 for (j = 0; j < use->n_map_members; j++)
4606 if (!use->cost_map[j].cand
4607 || infinite_cost_p (use->cost_map[j].cost))
4608 continue;
4610 fprintf (dump_file, " %d\t%d\t%d\t",
4611 use->cost_map[j].cand->id,
4612 use->cost_map[j].cost.cost,
4613 use->cost_map[j].cost.complexity);
4614 if (use->cost_map[j].depends_on)
4615 bitmap_print (dump_file,
4616 use->cost_map[j].depends_on, "","");
4617 if (use->cost_map[j].inv_expr_id != -1)
4618 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4619 fprintf (dump_file, "\n");
4622 fprintf (dump_file, "\n");
4624 fprintf (dump_file, "\n");
4628 /* Determines cost of the candidate CAND. */
4630 static void
4631 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4633 comp_cost cost_base;
4634 unsigned cost, cost_step;
4635 tree base;
4637 if (!cand->iv)
4639 cand->cost = 0;
4640 return;
4643 /* There are two costs associated with the candidate -- its increment
4644 and its initialization. The second is almost negligible for any loop
4645 that rolls enough, so we take it just very little into account. */
4647 base = cand->iv->base;
4648 cost_base = force_var_cost (data, base, NULL);
4649 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4651 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4653 /* Prefer the original ivs unless we may gain something by replacing it.
4654 The reason is to make debugging simpler; so this is not relevant for
4655 artificial ivs created by other optimization passes. */
4656 if (cand->pos != IP_ORIGINAL
4657 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4658 cost++;
4660 /* Prefer not to insert statements into latch unless there are some
4661 already (so that we do not create unnecessary jumps). */
4662 if (cand->pos == IP_END
4663 && empty_block_p (ip_end_pos (data->current_loop)))
4664 cost++;
4666 cand->cost = cost;
4667 cand->cost_step = cost_step;
4670 /* Determines costs of computation of the candidates. */
4672 static void
4673 determine_iv_costs (struct ivopts_data *data)
4675 unsigned i;
4677 if (dump_file && (dump_flags & TDF_DETAILS))
4679 fprintf (dump_file, "Candidate costs:\n");
4680 fprintf (dump_file, " cand\tcost\n");
4683 for (i = 0; i < n_iv_cands (data); i++)
4685 struct iv_cand *cand = iv_cand (data, i);
4687 determine_iv_cost (data, cand);
4689 if (dump_file && (dump_flags & TDF_DETAILS))
4690 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4693 if (dump_file && (dump_flags & TDF_DETAILS))
4694 fprintf (dump_file, "\n");
4697 /* Calculates cost for having SIZE induction variables. */
4699 static unsigned
4700 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4702 /* We add size to the cost, so that we prefer eliminating ivs
4703 if possible. */
4704 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4705 data->body_includes_call);
4708 /* For each size of the induction variable set determine the penalty. */
4710 static void
4711 determine_set_costs (struct ivopts_data *data)
4713 unsigned j, n;
4714 gimple phi;
4715 gimple_stmt_iterator psi;
4716 tree op;
4717 struct loop *loop = data->current_loop;
4718 bitmap_iterator bi;
4720 if (dump_file && (dump_flags & TDF_DETAILS))
4722 fprintf (dump_file, "Global costs:\n");
4723 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4724 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4725 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4726 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4729 n = 0;
4730 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4732 phi = gsi_stmt (psi);
4733 op = PHI_RESULT (phi);
4735 if (!is_gimple_reg (op))
4736 continue;
4738 if (get_iv (data, op))
4739 continue;
4741 n++;
4744 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4746 struct version_info *info = ver_info (data, j);
4748 if (info->inv_id && info->has_nonlin_use)
4749 n++;
4752 data->regs_used = n;
4753 if (dump_file && (dump_flags & TDF_DETAILS))
4754 fprintf (dump_file, " regs_used %d\n", n);
4756 if (dump_file && (dump_flags & TDF_DETAILS))
4758 fprintf (dump_file, " cost for size:\n");
4759 fprintf (dump_file, " ivs\tcost\n");
4760 for (j = 0; j <= 2 * target_avail_regs; j++)
4761 fprintf (dump_file, " %d\t%d\n", j,
4762 ivopts_global_cost_for_size (data, j));
4763 fprintf (dump_file, "\n");
4767 /* Returns true if A is a cheaper cost pair than B. */
4769 static bool
4770 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4772 int cmp;
4774 if (!a)
4775 return false;
4777 if (!b)
4778 return true;
4780 cmp = compare_costs (a->cost, b->cost);
4781 if (cmp < 0)
4782 return true;
4784 if (cmp > 0)
4785 return false;
4787 /* In case the costs are the same, prefer the cheaper candidate. */
4788 if (a->cand->cost < b->cand->cost)
4789 return true;
4791 return false;
4795 /* Returns candidate by that USE is expressed in IVS. */
4797 static struct cost_pair *
4798 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4800 return ivs->cand_for_use[use->id];
4804 /* Returns the number of temps needed for new loop invariant
4805 expressions. */
4807 static int
4808 iv_ca_get_num_inv_exprs (struct ivopts_data *data, struct iv_ca *ivs)
4810 unsigned i, n = 0;
4811 unsigned *used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
4813 for (i = 0; i < ivs->upto; i++)
4815 struct iv_use *use = iv_use (data, i);
4816 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
4817 if (cp && cp->inv_expr_id != -1)
4819 used_inv_expr[cp->inv_expr_id]++;
4820 if (used_inv_expr[cp->inv_expr_id] == 1)
4821 n++;
4825 free (used_inv_expr);
4826 return n;
4829 /* Computes the cost field of IVS structure. */
4831 static void
4832 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4834 unsigned n_inv_exprs = 0;
4835 comp_cost cost = ivs->cand_use_cost;
4837 cost.cost += ivs->cand_cost;
4839 n_inv_exprs = iv_ca_get_num_inv_exprs (data, ivs);
4840 cost.cost += ivopts_global_cost_for_size (data,
4841 ivs->n_regs + n_inv_exprs);
4843 ivs->cost = cost;
4846 /* Remove invariants in set INVS to set IVS. */
4848 static void
4849 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4851 bitmap_iterator bi;
4852 unsigned iid;
4854 if (!invs)
4855 return;
4857 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4859 ivs->n_invariant_uses[iid]--;
4860 if (ivs->n_invariant_uses[iid] == 0)
4861 ivs->n_regs--;
4865 /* Set USE not to be expressed by any candidate in IVS. */
4867 static void
4868 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4869 struct iv_use *use)
4871 unsigned uid = use->id, cid;
4872 struct cost_pair *cp;
4874 cp = ivs->cand_for_use[uid];
4875 if (!cp)
4876 return;
4877 cid = cp->cand->id;
4879 ivs->bad_uses++;
4880 ivs->cand_for_use[uid] = NULL;
4881 ivs->n_cand_uses[cid]--;
4883 if (ivs->n_cand_uses[cid] == 0)
4885 bitmap_clear_bit (ivs->cands, cid);
4886 /* Do not count the pseudocandidates. */
4887 if (cp->cand->iv)
4888 ivs->n_regs--;
4889 ivs->n_cands--;
4890 ivs->cand_cost -= cp->cand->cost;
4892 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4895 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4897 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4898 iv_ca_recount_cost (data, ivs);
4901 /* Add invariants in set INVS to set IVS. */
4903 static void
4904 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4906 bitmap_iterator bi;
4907 unsigned iid;
4909 if (!invs)
4910 return;
4912 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4914 ivs->n_invariant_uses[iid]++;
4915 if (ivs->n_invariant_uses[iid] == 1)
4916 ivs->n_regs++;
4920 /* Set cost pair for USE in set IVS to CP. */
4922 static void
4923 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4924 struct iv_use *use, struct cost_pair *cp)
4926 unsigned uid = use->id, cid;
4928 if (ivs->cand_for_use[uid] == cp)
4929 return;
4931 if (ivs->cand_for_use[uid])
4932 iv_ca_set_no_cp (data, ivs, use);
4934 if (cp)
4936 cid = cp->cand->id;
4938 ivs->bad_uses--;
4939 ivs->cand_for_use[uid] = cp;
4940 ivs->n_cand_uses[cid]++;
4941 if (ivs->n_cand_uses[cid] == 1)
4943 bitmap_set_bit (ivs->cands, cid);
4944 /* Do not count the pseudocandidates. */
4945 if (cp->cand->iv)
4946 ivs->n_regs++;
4947 ivs->n_cands++;
4948 ivs->cand_cost += cp->cand->cost;
4950 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4953 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4954 iv_ca_set_add_invariants (ivs, cp->depends_on);
4955 iv_ca_recount_cost (data, ivs);
4959 /* Extend set IVS by expressing USE by some of the candidates in it
4960 if possible. All important candidates will be considered
4961 if IMPORTANT_CANDIDATES is true. */
4963 static void
4964 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4965 struct iv_use *use, bool important_candidates)
4967 struct cost_pair *best_cp = NULL, *cp;
4968 bitmap_iterator bi;
4969 bitmap cands;
4970 unsigned i;
4972 gcc_assert (ivs->upto >= use->id);
4974 if (ivs->upto == use->id)
4976 ivs->upto++;
4977 ivs->bad_uses++;
4980 cands = (important_candidates ? data->important_candidates : ivs->cands);
4981 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4983 struct iv_cand *cand = iv_cand (data, i);
4985 cp = get_use_iv_cost (data, use, cand);
4987 if (cheaper_cost_pair (cp, best_cp))
4988 best_cp = cp;
4991 iv_ca_set_cp (data, ivs, use, best_cp);
4994 /* Get cost for assignment IVS. */
4996 static comp_cost
4997 iv_ca_cost (struct iv_ca *ivs)
4999 /* This was a conditional expression but it triggered a bug in
5000 Sun C 5.5. */
5001 if (ivs->bad_uses)
5002 return infinite_cost;
5003 else
5004 return ivs->cost;
5007 /* Returns true if all dependences of CP are among invariants in IVS. */
5009 static bool
5010 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5012 unsigned i;
5013 bitmap_iterator bi;
5015 if (!cp->depends_on)
5016 return true;
5018 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5020 if (ivs->n_invariant_uses[i] == 0)
5021 return false;
5024 return true;
5027 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5028 it before NEXT_CHANGE. */
5030 static struct iv_ca_delta *
5031 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5032 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5034 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5036 change->use = use;
5037 change->old_cp = old_cp;
5038 change->new_cp = new_cp;
5039 change->next_change = next_change;
5041 return change;
5044 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5045 are rewritten. */
5047 static struct iv_ca_delta *
5048 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5050 struct iv_ca_delta *last;
5052 if (!l2)
5053 return l1;
5055 if (!l1)
5056 return l2;
5058 for (last = l1; last->next_change; last = last->next_change)
5059 continue;
5060 last->next_change = l2;
5062 return l1;
5065 /* Reverse the list of changes DELTA, forming the inverse to it. */
5067 static struct iv_ca_delta *
5068 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5070 struct iv_ca_delta *act, *next, *prev = NULL;
5071 struct cost_pair *tmp;
5073 for (act = delta; act; act = next)
5075 next = act->next_change;
5076 act->next_change = prev;
5077 prev = act;
5079 tmp = act->old_cp;
5080 act->old_cp = act->new_cp;
5081 act->new_cp = tmp;
5084 return prev;
5087 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5088 reverted instead. */
5090 static void
5091 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5092 struct iv_ca_delta *delta, bool forward)
5094 struct cost_pair *from, *to;
5095 struct iv_ca_delta *act;
5097 if (!forward)
5098 delta = iv_ca_delta_reverse (delta);
5100 for (act = delta; act; act = act->next_change)
5102 from = act->old_cp;
5103 to = act->new_cp;
5104 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5105 iv_ca_set_cp (data, ivs, act->use, to);
5108 if (!forward)
5109 iv_ca_delta_reverse (delta);
5112 /* Returns true if CAND is used in IVS. */
5114 static bool
5115 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5117 return ivs->n_cand_uses[cand->id] > 0;
5120 /* Returns number of induction variable candidates in the set IVS. */
5122 static unsigned
5123 iv_ca_n_cands (struct iv_ca *ivs)
5125 return ivs->n_cands;
5128 /* Free the list of changes DELTA. */
5130 static void
5131 iv_ca_delta_free (struct iv_ca_delta **delta)
5133 struct iv_ca_delta *act, *next;
5135 for (act = *delta; act; act = next)
5137 next = act->next_change;
5138 free (act);
5141 *delta = NULL;
5144 /* Allocates new iv candidates assignment. */
5146 static struct iv_ca *
5147 iv_ca_new (struct ivopts_data *data)
5149 struct iv_ca *nw = XNEW (struct iv_ca);
5151 nw->upto = 0;
5152 nw->bad_uses = 0;
5153 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5154 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5155 nw->cands = BITMAP_ALLOC (NULL);
5156 nw->n_cands = 0;
5157 nw->n_regs = 0;
5158 nw->cand_use_cost = zero_cost;
5159 nw->cand_cost = 0;
5160 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5161 nw->cost = zero_cost;
5163 return nw;
5166 /* Free memory occupied by the set IVS. */
5168 static void
5169 iv_ca_free (struct iv_ca **ivs)
5171 free ((*ivs)->cand_for_use);
5172 free ((*ivs)->n_cand_uses);
5173 BITMAP_FREE ((*ivs)->cands);
5174 free ((*ivs)->n_invariant_uses);
5175 free (*ivs);
5176 *ivs = NULL;
5179 /* Dumps IVS to FILE. */
5181 static void
5182 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5184 const char *pref = " invariants ";
5185 unsigned i;
5186 comp_cost cost = iv_ca_cost (ivs);
5188 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5189 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5190 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5191 bitmap_print (file, ivs->cands, " candidates: ","\n");
5193 for (i = 0; i < ivs->upto; i++)
5195 struct iv_use *use = iv_use (data, i);
5196 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5197 if (cp)
5198 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5199 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5200 else
5201 fprintf (file, " use:%d --> ??\n", use->id);
5204 for (i = 1; i <= data->max_inv_id; i++)
5205 if (ivs->n_invariant_uses[i])
5207 fprintf (file, "%s%d", pref, i);
5208 pref = ", ";
5210 fprintf (file, "\n\n");
5213 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5214 new set, and store differences in DELTA. Number of induction variables
5215 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5216 the function will try to find a solution with mimimal iv candidates. */
5218 static comp_cost
5219 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5220 struct iv_cand *cand, struct iv_ca_delta **delta,
5221 unsigned *n_ivs, bool min_ncand)
5223 unsigned i;
5224 comp_cost cost;
5225 struct iv_use *use;
5226 struct cost_pair *old_cp, *new_cp;
5228 *delta = NULL;
5229 for (i = 0; i < ivs->upto; i++)
5231 use = iv_use (data, i);
5232 old_cp = iv_ca_cand_for_use (ivs, use);
5234 if (old_cp
5235 && old_cp->cand == cand)
5236 continue;
5238 new_cp = get_use_iv_cost (data, use, cand);
5239 if (!new_cp)
5240 continue;
5242 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5243 continue;
5245 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5246 continue;
5248 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5251 iv_ca_delta_commit (data, ivs, *delta, true);
5252 cost = iv_ca_cost (ivs);
5253 if (n_ivs)
5254 *n_ivs = iv_ca_n_cands (ivs);
5255 iv_ca_delta_commit (data, ivs, *delta, false);
5257 return cost;
5260 /* Try narrowing set IVS by removing CAND. Return the cost of
5261 the new set and store the differences in DELTA. */
5263 static comp_cost
5264 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5265 struct iv_cand *cand, struct iv_ca_delta **delta)
5267 unsigned i, ci;
5268 struct iv_use *use;
5269 struct cost_pair *old_cp, *new_cp, *cp;
5270 bitmap_iterator bi;
5271 struct iv_cand *cnd;
5272 comp_cost cost;
5274 *delta = NULL;
5275 for (i = 0; i < n_iv_uses (data); i++)
5277 use = iv_use (data, i);
5279 old_cp = iv_ca_cand_for_use (ivs, use);
5280 if (old_cp->cand != cand)
5281 continue;
5283 new_cp = NULL;
5285 if (data->consider_all_candidates)
5287 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5289 if (ci == cand->id)
5290 continue;
5292 cnd = iv_cand (data, ci);
5294 cp = get_use_iv_cost (data, use, cnd);
5295 if (!cp)
5296 continue;
5298 if (!iv_ca_has_deps (ivs, cp))
5299 continue;
5301 if (!cheaper_cost_pair (cp, new_cp))
5302 continue;
5304 new_cp = cp;
5307 else
5309 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5311 if (ci == cand->id)
5312 continue;
5314 cnd = iv_cand (data, ci);
5316 cp = get_use_iv_cost (data, use, cnd);
5317 if (!cp)
5318 continue;
5319 if (!iv_ca_has_deps (ivs, cp))
5320 continue;
5322 if (!cheaper_cost_pair (cp, new_cp))
5323 continue;
5325 new_cp = cp;
5329 if (!new_cp)
5331 iv_ca_delta_free (delta);
5332 return infinite_cost;
5335 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5338 iv_ca_delta_commit (data, ivs, *delta, true);
5339 cost = iv_ca_cost (ivs);
5340 iv_ca_delta_commit (data, ivs, *delta, false);
5342 return cost;
5345 /* Try optimizing the set of candidates IVS by removing candidates different
5346 from to EXCEPT_CAND from it. Return cost of the new set, and store
5347 differences in DELTA. */
5349 static comp_cost
5350 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5351 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5353 bitmap_iterator bi;
5354 struct iv_ca_delta *act_delta, *best_delta;
5355 unsigned i;
5356 comp_cost best_cost, acost;
5357 struct iv_cand *cand;
5359 best_delta = NULL;
5360 best_cost = iv_ca_cost (ivs);
5362 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5364 cand = iv_cand (data, i);
5366 if (cand == except_cand)
5367 continue;
5369 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5371 if (compare_costs (acost, best_cost) < 0)
5373 best_cost = acost;
5374 iv_ca_delta_free (&best_delta);
5375 best_delta = act_delta;
5377 else
5378 iv_ca_delta_free (&act_delta);
5381 if (!best_delta)
5383 *delta = NULL;
5384 return best_cost;
5387 /* Recurse to possibly remove other unnecessary ivs. */
5388 iv_ca_delta_commit (data, ivs, best_delta, true);
5389 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5390 iv_ca_delta_commit (data, ivs, best_delta, false);
5391 *delta = iv_ca_delta_join (best_delta, *delta);
5392 return best_cost;
5395 /* Tries to extend the sets IVS in the best possible way in order
5396 to express the USE. If ORIGINALP is true, prefer candidates from
5397 the original set of IVs, otherwise favor important candidates not
5398 based on any memory object. */
5400 static bool
5401 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5402 struct iv_use *use, bool originalp)
5404 comp_cost best_cost, act_cost;
5405 unsigned i;
5406 bitmap_iterator bi;
5407 struct iv_cand *cand;
5408 struct iv_ca_delta *best_delta = NULL, *act_delta;
5409 struct cost_pair *cp;
5411 iv_ca_add_use (data, ivs, use, false);
5412 best_cost = iv_ca_cost (ivs);
5414 cp = iv_ca_cand_for_use (ivs, use);
5415 if (!cp)
5417 ivs->upto--;
5418 ivs->bad_uses--;
5419 iv_ca_add_use (data, ivs, use, true);
5420 best_cost = iv_ca_cost (ivs);
5421 cp = iv_ca_cand_for_use (ivs, use);
5423 if (cp)
5425 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5426 iv_ca_set_no_cp (data, ivs, use);
5429 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5430 first try important candidates not based on any memory object. Only if
5431 this fails, try the specific ones. Rationale -- in loops with many
5432 variables the best choice often is to use just one generic biv. If we
5433 added here many ivs specific to the uses, the optimization algorithm later
5434 would be likely to get stuck in a local minimum, thus causing us to create
5435 too many ivs. The approach from few ivs to more seems more likely to be
5436 successful -- starting from few ivs, replacing an expensive use by a
5437 specific iv should always be a win. */
5438 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5440 cand = iv_cand (data, i);
5442 if (originalp && cand->pos !=IP_ORIGINAL)
5443 continue;
5445 if (!originalp && cand->iv->base_object != NULL_TREE)
5446 continue;
5448 if (iv_ca_cand_used_p (ivs, cand))
5449 continue;
5451 cp = get_use_iv_cost (data, use, cand);
5452 if (!cp)
5453 continue;
5455 iv_ca_set_cp (data, ivs, use, cp);
5456 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5457 true);
5458 iv_ca_set_no_cp (data, ivs, use);
5459 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5461 if (compare_costs (act_cost, best_cost) < 0)
5463 best_cost = act_cost;
5465 iv_ca_delta_free (&best_delta);
5466 best_delta = act_delta;
5468 else
5469 iv_ca_delta_free (&act_delta);
5472 if (infinite_cost_p (best_cost))
5474 for (i = 0; i < use->n_map_members; i++)
5476 cp = use->cost_map + i;
5477 cand = cp->cand;
5478 if (!cand)
5479 continue;
5481 /* Already tried this. */
5482 if (cand->important)
5484 if (originalp && cand->pos == IP_ORIGINAL)
5485 continue;
5486 if (!originalp && cand->iv->base_object == NULL_TREE)
5487 continue;
5490 if (iv_ca_cand_used_p (ivs, cand))
5491 continue;
5493 act_delta = NULL;
5494 iv_ca_set_cp (data, ivs, use, cp);
5495 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5496 iv_ca_set_no_cp (data, ivs, use);
5497 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5498 cp, act_delta);
5500 if (compare_costs (act_cost, best_cost) < 0)
5502 best_cost = act_cost;
5504 if (best_delta)
5505 iv_ca_delta_free (&best_delta);
5506 best_delta = act_delta;
5508 else
5509 iv_ca_delta_free (&act_delta);
5513 iv_ca_delta_commit (data, ivs, best_delta, true);
5514 iv_ca_delta_free (&best_delta);
5516 return !infinite_cost_p (best_cost);
5519 /* Finds an initial assignment of candidates to uses. */
5521 static struct iv_ca *
5522 get_initial_solution (struct ivopts_data *data, bool originalp)
5524 struct iv_ca *ivs = iv_ca_new (data);
5525 unsigned i;
5527 for (i = 0; i < n_iv_uses (data); i++)
5528 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5530 iv_ca_free (&ivs);
5531 return NULL;
5534 return ivs;
5537 /* Tries to improve set of induction variables IVS. */
5539 static bool
5540 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5542 unsigned i, n_ivs;
5543 comp_cost acost, best_cost = iv_ca_cost (ivs);
5544 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5545 struct iv_cand *cand;
5547 /* Try extending the set of induction variables by one. */
5548 for (i = 0; i < n_iv_cands (data); i++)
5550 cand = iv_cand (data, i);
5552 if (iv_ca_cand_used_p (ivs, cand))
5553 continue;
5555 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5556 if (!act_delta)
5557 continue;
5559 /* If we successfully added the candidate and the set is small enough,
5560 try optimizing it by removing other candidates. */
5561 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5563 iv_ca_delta_commit (data, ivs, act_delta, true);
5564 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5565 iv_ca_delta_commit (data, ivs, act_delta, false);
5566 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5569 if (compare_costs (acost, best_cost) < 0)
5571 best_cost = acost;
5572 iv_ca_delta_free (&best_delta);
5573 best_delta = act_delta;
5575 else
5576 iv_ca_delta_free (&act_delta);
5579 if (!best_delta)
5581 /* Try removing the candidates from the set instead. */
5582 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5584 /* Nothing more we can do. */
5585 if (!best_delta)
5586 return false;
5589 iv_ca_delta_commit (data, ivs, best_delta, true);
5590 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5591 iv_ca_delta_free (&best_delta);
5592 return true;
5595 /* Attempts to find the optimal set of induction variables. We do simple
5596 greedy heuristic -- we try to replace at most one candidate in the selected
5597 solution and remove the unused ivs while this improves the cost. */
5599 static struct iv_ca *
5600 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5602 struct iv_ca *set;
5604 /* Get the initial solution. */
5605 set = get_initial_solution (data, originalp);
5606 if (!set)
5608 if (dump_file && (dump_flags & TDF_DETAILS))
5609 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5610 return NULL;
5613 if (dump_file && (dump_flags & TDF_DETAILS))
5615 fprintf (dump_file, "Initial set of candidates:\n");
5616 iv_ca_dump (data, dump_file, set);
5619 while (try_improve_iv_set (data, set))
5621 if (dump_file && (dump_flags & TDF_DETAILS))
5623 fprintf (dump_file, "Improved to:\n");
5624 iv_ca_dump (data, dump_file, set);
5628 return set;
5631 static struct iv_ca *
5632 find_optimal_iv_set (struct ivopts_data *data)
5634 unsigned i;
5635 struct iv_ca *set, *origset;
5636 struct iv_use *use;
5637 comp_cost cost, origcost;
5639 /* Determine the cost based on a strategy that starts with original IVs,
5640 and try again using a strategy that prefers candidates not based
5641 on any IVs. */
5642 origset = find_optimal_iv_set_1 (data, true);
5643 set = find_optimal_iv_set_1 (data, false);
5645 if (!origset && !set)
5646 return NULL;
5648 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5649 cost = set ? iv_ca_cost (set) : infinite_cost;
5651 if (dump_file && (dump_flags & TDF_DETAILS))
5653 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5654 origcost.cost, origcost.complexity);
5655 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5656 cost.cost, cost.complexity);
5659 /* Choose the one with the best cost. */
5660 if (compare_costs (origcost, cost) <= 0)
5662 if (set)
5663 iv_ca_free (&set);
5664 set = origset;
5666 else if (origset)
5667 iv_ca_free (&origset);
5669 for (i = 0; i < n_iv_uses (data); i++)
5671 use = iv_use (data, i);
5672 use->selected = iv_ca_cand_for_use (set, use)->cand;
5675 return set;
5678 /* Creates a new induction variable corresponding to CAND. */
5680 static void
5681 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5683 gimple_stmt_iterator incr_pos;
5684 tree base;
5685 bool after = false;
5687 if (!cand->iv)
5688 return;
5690 switch (cand->pos)
5692 case IP_NORMAL:
5693 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5694 break;
5696 case IP_END:
5697 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5698 after = true;
5699 break;
5701 case IP_AFTER_USE:
5702 after = true;
5703 /* fall through */
5704 case IP_BEFORE_USE:
5705 incr_pos = gsi_for_stmt (cand->incremented_at);
5706 break;
5708 case IP_ORIGINAL:
5709 /* Mark that the iv is preserved. */
5710 name_info (data, cand->var_before)->preserve_biv = true;
5711 name_info (data, cand->var_after)->preserve_biv = true;
5713 /* Rewrite the increment so that it uses var_before directly. */
5714 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5715 return;
5718 gimple_add_tmp_var (cand->var_before);
5719 add_referenced_var (cand->var_before);
5721 base = unshare_expr (cand->iv->base);
5723 create_iv (base, unshare_expr (cand->iv->step),
5724 cand->var_before, data->current_loop,
5725 &incr_pos, after, &cand->var_before, &cand->var_after);
5728 /* Creates new induction variables described in SET. */
5730 static void
5731 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5733 unsigned i;
5734 struct iv_cand *cand;
5735 bitmap_iterator bi;
5737 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5739 cand = iv_cand (data, i);
5740 create_new_iv (data, cand);
5743 if (dump_file && (dump_flags & TDF_DETAILS))
5745 fprintf (dump_file, "\nSelected IV set: \n");
5746 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5748 cand = iv_cand (data, i);
5749 dump_cand (dump_file, cand);
5751 fprintf (dump_file, "\n");
5755 /* Rewrites USE (definition of iv used in a nonlinear expression)
5756 using candidate CAND. */
5758 static void
5759 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5760 struct iv_use *use, struct iv_cand *cand)
5762 tree comp;
5763 tree op, tgt;
5764 gimple ass;
5765 gimple_stmt_iterator bsi;
5767 /* An important special case -- if we are asked to express value of
5768 the original iv by itself, just exit; there is no need to
5769 introduce a new computation (that might also need casting the
5770 variable to unsigned and back). */
5771 if (cand->pos == IP_ORIGINAL
5772 && cand->incremented_at == use->stmt)
5774 tree step, ctype, utype;
5775 enum tree_code incr_code = PLUS_EXPR, old_code;
5777 gcc_assert (is_gimple_assign (use->stmt));
5778 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5780 step = cand->iv->step;
5781 ctype = TREE_TYPE (step);
5782 utype = TREE_TYPE (cand->var_after);
5783 if (TREE_CODE (step) == NEGATE_EXPR)
5785 incr_code = MINUS_EXPR;
5786 step = TREE_OPERAND (step, 0);
5789 /* Check whether we may leave the computation unchanged.
5790 This is the case only if it does not rely on other
5791 computations in the loop -- otherwise, the computation
5792 we rely upon may be removed in remove_unused_ivs,
5793 thus leading to ICE. */
5794 old_code = gimple_assign_rhs_code (use->stmt);
5795 if (old_code == PLUS_EXPR
5796 || old_code == MINUS_EXPR
5797 || old_code == POINTER_PLUS_EXPR)
5799 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5800 op = gimple_assign_rhs2 (use->stmt);
5801 else if (old_code != MINUS_EXPR
5802 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5803 op = gimple_assign_rhs1 (use->stmt);
5804 else
5805 op = NULL_TREE;
5807 else
5808 op = NULL_TREE;
5810 if (op
5811 && (TREE_CODE (op) == INTEGER_CST
5812 || operand_equal_p (op, step, 0)))
5813 return;
5815 /* Otherwise, add the necessary computations to express
5816 the iv. */
5817 op = fold_convert (ctype, cand->var_before);
5818 comp = fold_convert (utype,
5819 build2 (incr_code, ctype, op,
5820 unshare_expr (step)));
5822 else
5824 comp = get_computation (data->current_loop, use, cand);
5825 gcc_assert (comp != NULL_TREE);
5828 switch (gimple_code (use->stmt))
5830 case GIMPLE_PHI:
5831 tgt = PHI_RESULT (use->stmt);
5833 /* If we should keep the biv, do not replace it. */
5834 if (name_info (data, tgt)->preserve_biv)
5835 return;
5837 bsi = gsi_after_labels (gimple_bb (use->stmt));
5838 break;
5840 case GIMPLE_ASSIGN:
5841 tgt = gimple_assign_lhs (use->stmt);
5842 bsi = gsi_for_stmt (use->stmt);
5843 break;
5845 default:
5846 gcc_unreachable ();
5849 if (!valid_gimple_rhs_p (comp)
5850 || (gimple_code (use->stmt) != GIMPLE_PHI
5851 /* We can't allow re-allocating the stmt as it might be pointed
5852 to still. */
5853 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5854 >= gimple_num_ops (gsi_stmt (bsi)))))
5856 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5857 true, GSI_SAME_STMT);
5858 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5859 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5862 if (gimple_code (use->stmt) == GIMPLE_PHI)
5864 ass = gimple_build_assign (tgt, comp);
5865 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5867 bsi = gsi_for_stmt (use->stmt);
5868 remove_phi_node (&bsi, false);
5870 else
5872 gimple_assign_set_rhs_from_tree (&bsi, comp);
5873 use->stmt = gsi_stmt (bsi);
5877 /* Replaces ssa name in index IDX by its basic variable. Callback for
5878 for_each_index. */
5880 static bool
5881 idx_remove_ssa_names (tree base, tree *idx,
5882 void *data ATTRIBUTE_UNUSED)
5884 tree *op;
5886 if (TREE_CODE (*idx) == SSA_NAME)
5887 *idx = SSA_NAME_VAR (*idx);
5889 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5891 op = &TREE_OPERAND (base, 2);
5892 if (*op
5893 && TREE_CODE (*op) == SSA_NAME)
5894 *op = SSA_NAME_VAR (*op);
5895 op = &TREE_OPERAND (base, 3);
5896 if (*op
5897 && TREE_CODE (*op) == SSA_NAME)
5898 *op = SSA_NAME_VAR (*op);
5901 return true;
5904 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5906 static tree
5907 unshare_and_remove_ssa_names (tree ref)
5909 ref = unshare_expr (ref);
5910 for_each_index (&ref, idx_remove_ssa_names, NULL);
5912 return ref;
5915 /* Copies the reference information from OLD_REF to NEW_REF. */
5917 static void
5918 copy_ref_info (tree new_ref, tree old_ref)
5920 tree new_ptr_base = NULL_TREE;
5922 if (TREE_CODE (old_ref) == TARGET_MEM_REF
5923 && TREE_CODE (new_ref) == TARGET_MEM_REF)
5924 TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
5925 else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5926 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5928 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5929 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5931 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5932 new_ptr_base = TMR_BASE (new_ref);
5933 else if (TREE_CODE (new_ref) == MEM_REF)
5934 new_ptr_base = TREE_OPERAND (new_ref, 0);
5936 /* We can transfer points-to information from an old pointer
5937 or decl base to the new one. */
5938 if (new_ptr_base
5939 && TREE_CODE (new_ptr_base) == SSA_NAME
5940 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5941 && !SSA_NAME_PTR_INFO (new_ptr_base))
5943 tree base = get_base_address (old_ref);
5944 if (!base)
5946 else if ((INDIRECT_REF_P (base)
5947 || TREE_CODE (base) == MEM_REF)
5948 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5949 duplicate_ssa_name_ptr_info
5950 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5951 else if (TREE_CODE (base) == VAR_DECL
5952 || TREE_CODE (base) == PARM_DECL
5953 || TREE_CODE (base) == RESULT_DECL)
5955 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5956 pt_solution_set_var (&pi->pt, base);
5961 /* Performs a peephole optimization to reorder the iv update statement with
5962 a mem ref to enable instruction combining in later phases. The mem ref uses
5963 the iv value before the update, so the reordering transformation requires
5964 adjustment of the offset. CAND is the selected IV_CAND.
5966 Example:
5968 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5969 iv2 = iv1 + 1;
5971 if (t < val) (1)
5972 goto L;
5973 goto Head;
5976 directly propagating t over to (1) will introduce overlapping live range
5977 thus increase register pressure. This peephole transform it into:
5980 iv2 = iv1 + 1;
5981 t = MEM_REF (base, iv2, 8, 8);
5982 if (t < val)
5983 goto L;
5984 goto Head;
5987 static void
5988 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5990 tree var_after;
5991 gimple iv_update, stmt;
5992 basic_block bb;
5993 gimple_stmt_iterator gsi, gsi_iv;
5995 if (cand->pos != IP_NORMAL)
5996 return;
5998 var_after = cand->var_after;
5999 iv_update = SSA_NAME_DEF_STMT (var_after);
6001 bb = gimple_bb (iv_update);
6002 gsi = gsi_last_nondebug_bb (bb);
6003 stmt = gsi_stmt (gsi);
6005 /* Only handle conditional statement for now. */
6006 if (gimple_code (stmt) != GIMPLE_COND)
6007 return;
6009 gsi_prev_nondebug (&gsi);
6010 stmt = gsi_stmt (gsi);
6011 if (stmt != iv_update)
6012 return;
6014 gsi_prev_nondebug (&gsi);
6015 if (gsi_end_p (gsi))
6016 return;
6018 stmt = gsi_stmt (gsi);
6019 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6020 return;
6022 if (stmt != use->stmt)
6023 return;
6025 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6026 return;
6028 if (dump_file && (dump_flags & TDF_DETAILS))
6030 fprintf (dump_file, "Reordering \n");
6031 print_gimple_stmt (dump_file, iv_update, 0, 0);
6032 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6033 fprintf (dump_file, "\n");
6036 gsi = gsi_for_stmt (use->stmt);
6037 gsi_iv = gsi_for_stmt (iv_update);
6038 gsi_move_before (&gsi_iv, &gsi);
6040 cand->pos = IP_BEFORE_USE;
6041 cand->incremented_at = use->stmt;
6044 /* Rewrites USE (address that is an iv) using candidate CAND. */
6046 static void
6047 rewrite_use_address (struct ivopts_data *data,
6048 struct iv_use *use, struct iv_cand *cand)
6050 aff_tree aff;
6051 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6052 tree base_hint = NULL_TREE;
6053 tree ref, iv;
6054 bool ok;
6056 adjust_iv_update_pos (cand, use);
6057 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6058 gcc_assert (ok);
6059 unshare_aff_combination (&aff);
6061 /* To avoid undefined overflow problems, all IV candidates use unsigned
6062 integer types. The drawback is that this makes it impossible for
6063 create_mem_ref to distinguish an IV that is based on a memory object
6064 from one that represents simply an offset.
6066 To work around this problem, we pass a hint to create_mem_ref that
6067 indicates which variable (if any) in aff is an IV based on a memory
6068 object. Note that we only consider the candidate. If this is not
6069 based on an object, the base of the reference is in some subexpression
6070 of the use -- but these will use pointer types, so they are recognized
6071 by the create_mem_ref heuristics anyway. */
6072 if (cand->iv->base_object)
6073 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6075 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6076 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6077 reference_alias_ptr_type (*use->op_p),
6078 iv, base_hint, data->speed);
6079 copy_ref_info (ref, *use->op_p);
6080 *use->op_p = ref;
6083 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6084 candidate CAND. */
6086 static void
6087 rewrite_use_compare (struct ivopts_data *data,
6088 struct iv_use *use, struct iv_cand *cand)
6090 tree comp, *var_p, op, bound;
6091 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6092 enum tree_code compare;
6093 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6094 bool ok;
6096 bound = cp->value;
6097 if (bound)
6099 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6100 tree var_type = TREE_TYPE (var);
6101 gimple_seq stmts;
6103 if (dump_file && (dump_flags & TDF_DETAILS))
6105 fprintf (dump_file, "Replacing exit test: ");
6106 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6108 compare = iv_elimination_compare (data, use);
6109 bound = unshare_expr (fold_convert (var_type, bound));
6110 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6111 if (stmts)
6112 gsi_insert_seq_on_edge_immediate (
6113 loop_preheader_edge (data->current_loop),
6114 stmts);
6116 gimple_cond_set_lhs (use->stmt, var);
6117 gimple_cond_set_code (use->stmt, compare);
6118 gimple_cond_set_rhs (use->stmt, op);
6119 return;
6122 /* The induction variable elimination failed; just express the original
6123 giv. */
6124 comp = get_computation (data->current_loop, use, cand);
6125 gcc_assert (comp != NULL_TREE);
6127 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6128 gcc_assert (ok);
6130 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6131 true, GSI_SAME_STMT);
6134 /* Rewrites USE using candidate CAND. */
6136 static void
6137 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6139 switch (use->type)
6141 case USE_NONLINEAR_EXPR:
6142 rewrite_use_nonlinear_expr (data, use, cand);
6143 break;
6145 case USE_ADDRESS:
6146 rewrite_use_address (data, use, cand);
6147 break;
6149 case USE_COMPARE:
6150 rewrite_use_compare (data, use, cand);
6151 break;
6153 default:
6154 gcc_unreachable ();
6157 update_stmt (use->stmt);
6160 /* Rewrite the uses using the selected induction variables. */
6162 static void
6163 rewrite_uses (struct ivopts_data *data)
6165 unsigned i;
6166 struct iv_cand *cand;
6167 struct iv_use *use;
6169 for (i = 0; i < n_iv_uses (data); i++)
6171 use = iv_use (data, i);
6172 cand = use->selected;
6173 gcc_assert (cand);
6175 rewrite_use (data, use, cand);
6179 /* Removes the ivs that are not used after rewriting. */
6181 static void
6182 remove_unused_ivs (struct ivopts_data *data)
6184 unsigned j;
6185 bitmap_iterator bi;
6186 bitmap toremove = BITMAP_ALLOC (NULL);
6188 /* Figure out an order in which to release SSA DEFs so that we don't
6189 release something that we'd have to propagate into a debug stmt
6190 afterwards. */
6191 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6193 struct version_info *info;
6195 info = ver_info (data, j);
6196 if (info->iv
6197 && !integer_zerop (info->iv->step)
6198 && !info->inv_id
6199 && !info->iv->have_use_for
6200 && !info->preserve_biv)
6201 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6204 release_defs_bitset (toremove);
6206 BITMAP_FREE (toremove);
6209 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6210 for pointer_map_traverse. */
6212 static bool
6213 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6214 void *data ATTRIBUTE_UNUSED)
6216 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6218 free (niter);
6219 return true;
6222 /* Frees data allocated by the optimization of a single loop. */
6224 static void
6225 free_loop_data (struct ivopts_data *data)
6227 unsigned i, j;
6228 bitmap_iterator bi;
6229 tree obj;
6231 if (data->niters)
6233 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6234 pointer_map_destroy (data->niters);
6235 data->niters = NULL;
6238 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6240 struct version_info *info;
6242 info = ver_info (data, i);
6243 if (info->iv)
6244 free (info->iv);
6245 info->iv = NULL;
6246 info->has_nonlin_use = false;
6247 info->preserve_biv = false;
6248 info->inv_id = 0;
6250 bitmap_clear (data->relevant);
6251 bitmap_clear (data->important_candidates);
6253 for (i = 0; i < n_iv_uses (data); i++)
6255 struct iv_use *use = iv_use (data, i);
6257 free (use->iv);
6258 BITMAP_FREE (use->related_cands);
6259 for (j = 0; j < use->n_map_members; j++)
6260 if (use->cost_map[j].depends_on)
6261 BITMAP_FREE (use->cost_map[j].depends_on);
6262 free (use->cost_map);
6263 free (use);
6265 VEC_truncate (iv_use_p, data->iv_uses, 0);
6267 for (i = 0; i < n_iv_cands (data); i++)
6269 struct iv_cand *cand = iv_cand (data, i);
6271 if (cand->iv)
6272 free (cand->iv);
6273 if (cand->depends_on)
6274 BITMAP_FREE (cand->depends_on);
6275 free (cand);
6277 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6279 if (data->version_info_size < num_ssa_names)
6281 data->version_info_size = 2 * num_ssa_names;
6282 free (data->version_info);
6283 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6286 data->max_inv_id = 0;
6288 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
6289 SET_DECL_RTL (obj, NULL_RTX);
6291 VEC_truncate (tree, decl_rtl_to_reset, 0);
6293 htab_empty (data->inv_expr_tab);
6294 data->inv_expr_id = 0;
6297 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6298 loop tree. */
6300 static void
6301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6303 free_loop_data (data);
6304 free (data->version_info);
6305 BITMAP_FREE (data->relevant);
6306 BITMAP_FREE (data->important_candidates);
6308 VEC_free (tree, heap, decl_rtl_to_reset);
6309 VEC_free (iv_use_p, heap, data->iv_uses);
6310 VEC_free (iv_cand_p, heap, data->iv_candidates);
6311 htab_delete (data->inv_expr_tab);
6314 /* Returns true if the loop body BODY includes any function calls. */
6316 static bool
6317 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6319 gimple_stmt_iterator gsi;
6320 unsigned i;
6322 for (i = 0; i < num_nodes; i++)
6323 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6325 gimple stmt = gsi_stmt (gsi);
6326 if (is_gimple_call (stmt)
6327 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6328 return true;
6330 return false;
6333 /* Optimizes the LOOP. Returns true if anything changed. */
6335 static bool
6336 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6338 bool changed = false;
6339 struct iv_ca *iv_ca;
6340 edge exit;
6341 basic_block *body;
6343 gcc_assert (!data->niters);
6344 data->current_loop = loop;
6345 data->speed = optimize_loop_for_speed_p (loop);
6347 if (dump_file && (dump_flags & TDF_DETAILS))
6349 fprintf (dump_file, "Processing loop %d\n", loop->num);
6351 exit = single_dom_exit (loop);
6352 if (exit)
6354 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6355 exit->src->index, exit->dest->index);
6356 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6357 fprintf (dump_file, "\n");
6360 fprintf (dump_file, "\n");
6363 body = get_loop_body (loop);
6364 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6365 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6366 free (body);
6368 /* For each ssa name determines whether it behaves as an induction variable
6369 in some loop. */
6370 if (!find_induction_variables (data))
6371 goto finish;
6373 /* Finds interesting uses (item 1). */
6374 find_interesting_uses (data);
6375 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6376 goto finish;
6378 /* Finds candidates for the induction variables (item 2). */
6379 find_iv_candidates (data);
6381 /* Calculates the costs (item 3, part 1). */
6382 determine_iv_costs (data);
6383 determine_use_iv_costs (data);
6384 determine_set_costs (data);
6386 /* Find the optimal set of induction variables (item 3, part 2). */
6387 iv_ca = find_optimal_iv_set (data);
6388 if (!iv_ca)
6389 goto finish;
6390 changed = true;
6392 /* Create the new induction variables (item 4, part 1). */
6393 create_new_ivs (data, iv_ca);
6394 iv_ca_free (&iv_ca);
6396 /* Rewrite the uses (item 4, part 2). */
6397 rewrite_uses (data);
6399 /* Remove the ivs that are unused after rewriting. */
6400 remove_unused_ivs (data);
6402 /* We have changed the structure of induction variables; it might happen
6403 that definitions in the scev database refer to some of them that were
6404 eliminated. */
6405 scev_reset ();
6407 finish:
6408 free_loop_data (data);
6410 return changed;
6413 /* Main entry point. Optimizes induction variables in loops. */
6415 void
6416 tree_ssa_iv_optimize (void)
6418 struct loop *loop;
6419 struct ivopts_data data;
6420 loop_iterator li;
6422 tree_ssa_iv_optimize_init (&data);
6424 /* Optimize the loops starting with the innermost ones. */
6425 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6427 if (dump_file && (dump_flags & TDF_DETAILS))
6428 flow_loop_dump (loop, dump_file, NULL, 1);
6430 tree_ssa_iv_optimize_loop (&data, loop);
6433 tree_ssa_iv_optimize_finalize (&data);