clean up and renames beginigs of a testsuite
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobc940d413de8c254a0be1a96848646adb10b011a7
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* Total cost of the assignment. */
331 comp_cost cost;
334 /* Difference of two iv candidate assignments. */
336 struct iv_ca_delta
338 /* Changed use. */
339 struct iv_use *use;
341 /* An old assignment (for rollback purposes). */
342 struct cost_pair *old_cp;
344 /* A new assignment. */
345 struct cost_pair *new_cp;
347 /* Next change in the list. */
348 struct iv_ca_delta *next_change;
351 /* Bound on number of candidates below that all candidates are considered. */
353 #define CONSIDER_ALL_CANDIDATES_BOUND \
354 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
356 /* If there are more iv occurrences, we just give up (it is quite unlikely that
357 optimizing such a loop would help, and it would take ages). */
359 #define MAX_CONSIDERED_USES \
360 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
362 /* If there are at most this number of ivs in the set, try removing unnecessary
363 ivs from the set always. */
365 #define ALWAYS_PRUNE_CAND_SET_BOUND \
366 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
368 /* The list of trees for that the decl_rtl field must be reset is stored
369 here. */
371 static VEC(tree,heap) *decl_rtl_to_reset;
373 /* Number of uses recorded in DATA. */
375 static inline unsigned
376 n_iv_uses (struct ivopts_data *data)
378 return VEC_length (iv_use_p, data->iv_uses);
381 /* Ith use recorded in DATA. */
383 static inline struct iv_use *
384 iv_use (struct ivopts_data *data, unsigned i)
386 return VEC_index (iv_use_p, data->iv_uses, i);
389 /* Number of candidates recorded in DATA. */
391 static inline unsigned
392 n_iv_cands (struct ivopts_data *data)
394 return VEC_length (iv_cand_p, data->iv_candidates);
397 /* Ith candidate recorded in DATA. */
399 static inline struct iv_cand *
400 iv_cand (struct ivopts_data *data, unsigned i)
402 return VEC_index (iv_cand_p, data->iv_candidates, i);
405 /* The single loop exit if it dominates the latch, NULL otherwise. */
407 edge
408 single_dom_exit (struct loop *loop)
410 edge exit = single_exit (loop);
412 if (!exit)
413 return NULL;
415 if (!just_once_each_iteration_p (loop, exit->src))
416 return NULL;
418 return exit;
421 /* Dumps information about the induction variable IV to FILE. */
423 extern void dump_iv (FILE *, struct iv *);
424 void
425 dump_iv (FILE *file, struct iv *iv)
427 if (iv->ssa_name)
429 fprintf (file, "ssa name ");
430 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
431 fprintf (file, "\n");
434 fprintf (file, " type ");
435 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
436 fprintf (file, "\n");
438 if (iv->step)
440 fprintf (file, " base ");
441 print_generic_expr (file, iv->base, TDF_SLIM);
442 fprintf (file, "\n");
444 fprintf (file, " step ");
445 print_generic_expr (file, iv->step, TDF_SLIM);
446 fprintf (file, "\n");
448 else
450 fprintf (file, " invariant ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
455 if (iv->base_object)
457 fprintf (file, " base object ");
458 print_generic_expr (file, iv->base_object, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->biv_p)
463 fprintf (file, " is a biv\n");
466 /* Dumps information about the USE to FILE. */
468 extern void dump_use (FILE *, struct iv_use *);
469 void
470 dump_use (FILE *file, struct iv_use *use)
472 fprintf (file, "use %d\n", use->id);
474 switch (use->type)
476 case USE_NONLINEAR_EXPR:
477 fprintf (file, " generic\n");
478 break;
480 case USE_ADDRESS:
481 fprintf (file, " address\n");
482 break;
484 case USE_COMPARE:
485 fprintf (file, " compare\n");
486 break;
488 default:
489 gcc_unreachable ();
492 fprintf (file, " in statement ");
493 print_gimple_stmt (file, use->stmt, 0, 0);
494 fprintf (file, "\n");
496 fprintf (file, " at position ");
497 if (use->op_p)
498 print_generic_expr (file, *use->op_p, TDF_SLIM);
499 fprintf (file, "\n");
501 dump_iv (file, use->iv);
503 if (use->related_cands)
505 fprintf (file, " related candidates ");
506 dump_bitmap (file, use->related_cands);
510 /* Dumps information about the uses to FILE. */
512 extern void dump_uses (FILE *, struct ivopts_data *);
513 void
514 dump_uses (FILE *file, struct ivopts_data *data)
516 unsigned i;
517 struct iv_use *use;
519 for (i = 0; i < n_iv_uses (data); i++)
521 use = iv_use (data, i);
523 dump_use (file, use);
524 fprintf (file, "\n");
528 /* Dumps information about induction variable candidate CAND to FILE. */
530 extern void dump_cand (FILE *, struct iv_cand *);
531 void
532 dump_cand (FILE *file, struct iv_cand *cand)
534 struct iv *iv = cand->iv;
536 fprintf (file, "candidate %d%s\n",
537 cand->id, cand->important ? " (important)" : "");
539 if (cand->depends_on)
541 fprintf (file, " depends on ");
542 dump_bitmap (file, cand->depends_on);
545 if (!iv)
547 fprintf (file, " final value replacement\n");
548 return;
551 if (cand->var_before)
553 fprintf (file, " var_before ");
554 print_generic_expr (file, cand->var_before, TDF_SLIM);
555 fprintf (file, "\n");
557 if (cand->var_after)
559 fprintf (file, " var_after ");
560 print_generic_expr (file, cand->var_after, TDF_SLIM);
561 fprintf (file, "\n");
564 switch (cand->pos)
566 case IP_NORMAL:
567 fprintf (file, " incremented before exit test\n");
568 break;
570 case IP_BEFORE_USE:
571 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
572 break;
574 case IP_AFTER_USE:
575 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
576 break;
578 case IP_END:
579 fprintf (file, " incremented at end\n");
580 break;
582 case IP_ORIGINAL:
583 fprintf (file, " original biv\n");
584 break;
587 dump_iv (file, iv);
590 /* Returns the info for ssa version VER. */
592 static inline struct version_info *
593 ver_info (struct ivopts_data *data, unsigned ver)
595 return data->version_info + ver;
598 /* Returns the info for ssa name NAME. */
600 static inline struct version_info *
601 name_info (struct ivopts_data *data, tree name)
603 return ver_info (data, SSA_NAME_VERSION (name));
606 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
607 emitted in LOOP. */
609 static bool
610 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
612 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
614 gcc_assert (bb);
616 if (sbb == loop->latch)
617 return true;
619 if (sbb != bb)
620 return false;
622 return stmt == last_stmt (bb);
625 /* Returns true if STMT if after the place where the original induction
626 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
627 if the positions are identical. */
629 static bool
630 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
632 basic_block cand_bb = gimple_bb (cand->incremented_at);
633 basic_block stmt_bb = gimple_bb (stmt);
635 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
636 return false;
638 if (stmt_bb != cand_bb)
639 return true;
641 if (true_if_equal
642 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
643 return true;
644 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
647 /* Returns true if STMT if after the place where the induction variable
648 CAND is incremented in LOOP. */
650 static bool
651 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
653 switch (cand->pos)
655 case IP_END:
656 return false;
658 case IP_NORMAL:
659 return stmt_after_ip_normal_pos (loop, stmt);
661 case IP_ORIGINAL:
662 case IP_AFTER_USE:
663 return stmt_after_inc_pos (cand, stmt, false);
665 case IP_BEFORE_USE:
666 return stmt_after_inc_pos (cand, stmt, true);
668 default:
669 gcc_unreachable ();
673 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
675 static bool
676 abnormal_ssa_name_p (tree exp)
678 if (!exp)
679 return false;
681 if (TREE_CODE (exp) != SSA_NAME)
682 return false;
684 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
687 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
688 abnormal phi node. Callback for for_each_index. */
690 static bool
691 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
692 void *data ATTRIBUTE_UNUSED)
694 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
696 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
697 return false;
698 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
699 return false;
702 return !abnormal_ssa_name_p (*index);
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
708 bool
709 contains_abnormal_ssa_name_p (tree expr)
711 enum tree_code code;
712 enum tree_code_class codeclass;
714 if (!expr)
715 return false;
717 code = TREE_CODE (expr);
718 codeclass = TREE_CODE_CLASS (code);
720 if (code == SSA_NAME)
721 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
723 if (code == INTEGER_CST
724 || is_gimple_min_invariant (expr))
725 return false;
727 if (code == ADDR_EXPR)
728 return !for_each_index (&TREE_OPERAND (expr, 0),
729 idx_contains_abnormal_ssa_name_p,
730 NULL);
732 if (code == COND_EXPR)
733 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
734 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
735 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
737 switch (codeclass)
739 case tcc_binary:
740 case tcc_comparison:
741 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
742 return true;
744 /* Fallthru. */
745 case tcc_unary:
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
747 return true;
749 break;
751 default:
752 gcc_unreachable ();
755 return false;
758 /* Returns tree describing number of iterations determined from
759 EXIT of DATA->current_loop, or NULL if something goes wrong. */
761 static tree
762 niter_for_exit (struct ivopts_data *data, edge exit,
763 struct tree_niter_desc **desc_p)
765 struct tree_niter_desc* desc = NULL;
766 tree niter;
767 void **slot;
769 if (!data->niters)
771 data->niters = pointer_map_create ();
772 slot = NULL;
774 else
775 slot = pointer_map_contains (data->niters, exit);
777 if (!slot)
779 /* Try to determine number of iterations. We must know it
780 unconditionally (i.e., without possibility of # of iterations
781 being zero). Also, we cannot safely work with ssa names that
782 appear in phi nodes on abnormal edges, so that we do not create
783 overlapping life ranges for them (PR 27283). */
784 desc = XNEW (struct tree_niter_desc);
785 if (number_of_iterations_exit (data->current_loop,
786 exit, desc, true)
787 && integer_zerop (desc->may_be_zero)
788 && !contains_abnormal_ssa_name_p (desc->niter))
789 niter = desc->niter;
790 else
791 niter = NULL_TREE;
793 desc->niter = niter;
794 slot = pointer_map_insert (data->niters, exit);
795 *slot = desc;
797 else
798 niter = ((struct tree_niter_desc *) *slot)->niter;
800 if (desc_p)
801 *desc_p = (struct tree_niter_desc *) *slot;
802 return niter;
805 /* Returns tree describing number of iterations determined from
806 single dominating exit of DATA->current_loop, or NULL if something
807 goes wrong. */
809 static tree
810 niter_for_single_dom_exit (struct ivopts_data *data)
812 edge exit = single_dom_exit (data->current_loop);
814 if (!exit)
815 return NULL;
817 return niter_for_exit (data, exit, NULL);
820 /* Hash table equality function for expressions. */
822 static int
823 htab_inv_expr_eq (const void *ent1, const void *ent2)
825 const struct iv_inv_expr_ent *expr1 =
826 (const struct iv_inv_expr_ent *)ent1;
827 const struct iv_inv_expr_ent *expr2 =
828 (const struct iv_inv_expr_ent *)ent2;
830 return operand_equal_p (expr1->expr, expr2->expr, 0);
833 /* Hash function for loop invariant expressions. */
835 static hashval_t
836 htab_inv_expr_hash (const void *ent)
838 const struct iv_inv_expr_ent *expr =
839 (const struct iv_inv_expr_ent *)ent;
840 return expr->hash;
843 /* Initializes data structures used by the iv optimization pass, stored
844 in DATA. */
846 static void
847 tree_ssa_iv_optimize_init (struct ivopts_data *data)
849 data->version_info_size = 2 * num_ssa_names;
850 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
851 data->relevant = BITMAP_ALLOC (NULL);
852 data->important_candidates = BITMAP_ALLOC (NULL);
853 data->max_inv_id = 0;
854 data->niters = NULL;
855 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
856 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
857 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
858 htab_inv_expr_eq, free);
859 data->inv_expr_id = 0;
860 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
863 /* Returns a memory object to that EXPR points. In case we are able to
864 determine that it does not point to any such object, NULL is returned. */
866 static tree
867 determine_base_object (tree expr)
869 enum tree_code code = TREE_CODE (expr);
870 tree base, obj;
872 /* If this is a pointer casted to any type, we need to determine
873 the base object for the pointer; so handle conversions before
874 throwing away non-pointer expressions. */
875 if (CONVERT_EXPR_P (expr))
876 return determine_base_object (TREE_OPERAND (expr, 0));
878 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
879 return NULL_TREE;
881 switch (code)
883 case INTEGER_CST:
884 return NULL_TREE;
886 case ADDR_EXPR:
887 obj = TREE_OPERAND (expr, 0);
888 base = get_base_address (obj);
890 if (!base)
891 return expr;
893 if (TREE_CODE (base) == MEM_REF)
894 return determine_base_object (TREE_OPERAND (base, 0));
896 return fold_convert (ptr_type_node,
897 build_fold_addr_expr (base));
899 case POINTER_PLUS_EXPR:
900 return determine_base_object (TREE_OPERAND (expr, 0));
902 case PLUS_EXPR:
903 case MINUS_EXPR:
904 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
905 gcc_unreachable ();
907 default:
908 return fold_convert (ptr_type_node, expr);
912 /* Allocates an induction variable with given initial value BASE and step STEP
913 for loop LOOP. */
915 static struct iv *
916 alloc_iv (tree base, tree step)
918 struct iv *iv = XCNEW (struct iv);
919 gcc_assert (step != NULL_TREE);
921 iv->base = base;
922 iv->base_object = determine_base_object (base);
923 iv->step = step;
924 iv->biv_p = false;
925 iv->have_use_for = false;
926 iv->use_id = 0;
927 iv->ssa_name = NULL_TREE;
929 return iv;
932 /* Sets STEP and BASE for induction variable IV. */
934 static void
935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
937 struct version_info *info = name_info (data, iv);
939 gcc_assert (!info->iv);
941 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
942 info->iv = alloc_iv (base, step);
943 info->iv->ssa_name = iv;
946 /* Finds induction variable declaration for VAR. */
948 static struct iv *
949 get_iv (struct ivopts_data *data, tree var)
951 basic_block bb;
952 tree type = TREE_TYPE (var);
954 if (!POINTER_TYPE_P (type)
955 && !INTEGRAL_TYPE_P (type))
956 return NULL;
958 if (!name_info (data, var)->iv)
960 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
962 if (!bb
963 || !flow_bb_inside_loop_p (data->current_loop, bb))
964 set_iv (data, var, var, build_int_cst (type, 0));
967 return name_info (data, var)->iv;
970 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
971 not define a simple affine biv with nonzero step. */
973 static tree
974 determine_biv_step (gimple phi)
976 struct loop *loop = gimple_bb (phi)->loop_father;
977 tree name = PHI_RESULT (phi);
978 affine_iv iv;
980 if (!is_gimple_reg (name))
981 return NULL_TREE;
983 if (!simple_iv (loop, loop, name, &iv, true))
984 return NULL_TREE;
986 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
989 /* Finds basic ivs. */
991 static bool
992 find_bivs (struct ivopts_data *data)
994 gimple phi;
995 tree step, type, base;
996 bool found = false;
997 struct loop *loop = data->current_loop;
998 gimple_stmt_iterator psi;
1000 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1002 phi = gsi_stmt (psi);
1004 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005 continue;
1007 step = determine_biv_step (phi);
1008 if (!step)
1009 continue;
1011 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012 base = expand_simple_operations (base);
1013 if (contains_abnormal_ssa_name_p (base)
1014 || contains_abnormal_ssa_name_p (step))
1015 continue;
1017 type = TREE_TYPE (PHI_RESULT (phi));
1018 base = fold_convert (type, base);
1019 if (step)
1021 if (POINTER_TYPE_P (type))
1022 step = fold_convert (sizetype, step);
1023 else
1024 step = fold_convert (type, step);
1027 set_iv (data, PHI_RESULT (phi), base, step);
1028 found = true;
1031 return found;
1034 /* Marks basic ivs. */
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1039 gimple phi;
1040 tree var;
1041 struct iv *iv, *incr_iv;
1042 struct loop *loop = data->current_loop;
1043 basic_block incr_bb;
1044 gimple_stmt_iterator psi;
1046 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1048 phi = gsi_stmt (psi);
1050 iv = get_iv (data, PHI_RESULT (phi));
1051 if (!iv)
1052 continue;
1054 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055 incr_iv = get_iv (data, var);
1056 if (!incr_iv)
1057 continue;
1059 /* If the increment is in the subloop, ignore it. */
1060 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061 if (incr_bb->loop_father != data->current_loop
1062 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063 continue;
1065 iv->biv_p = true;
1066 incr_iv->biv_p = true;
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071 parameters to IV. */
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1076 tree lhs;
1077 struct loop *loop = data->current_loop;
1079 iv->base = NULL_TREE;
1080 iv->step = NULL_TREE;
1082 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083 return false;
1085 lhs = gimple_assign_lhs (stmt);
1086 if (TREE_CODE (lhs) != SSA_NAME)
1087 return false;
1089 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090 return false;
1091 iv->base = expand_simple_operations (iv->base);
1093 if (contains_abnormal_ssa_name_p (iv->base)
1094 || contains_abnormal_ssa_name_p (iv->step))
1095 return false;
1097 return true;
1100 /* Finds general ivs in statement STMT. */
1102 static void
1103 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1105 affine_iv iv;
1107 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1108 return;
1110 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1113 /* Finds general ivs in basic block BB. */
1115 static void
1116 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1118 gimple_stmt_iterator bsi;
1120 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1121 find_givs_in_stmt (data, gsi_stmt (bsi));
1124 /* Finds general ivs. */
1126 static void
1127 find_givs (struct ivopts_data *data)
1129 struct loop *loop = data->current_loop;
1130 basic_block *body = get_loop_body_in_dom_order (loop);
1131 unsigned i;
1133 for (i = 0; i < loop->num_nodes; i++)
1134 find_givs_in_bb (data, body[i]);
1135 free (body);
1138 /* For each ssa name defined in LOOP determines whether it is an induction
1139 variable and if so, its initial value and step. */
1141 static bool
1142 find_induction_variables (struct ivopts_data *data)
1144 unsigned i;
1145 bitmap_iterator bi;
1147 if (!find_bivs (data))
1148 return false;
1150 find_givs (data);
1151 mark_bivs (data);
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1155 tree niter = niter_for_single_dom_exit (data);
1157 if (niter)
1159 fprintf (dump_file, " number of iterations ");
1160 print_generic_expr (dump_file, niter, TDF_SLIM);
1161 fprintf (dump_file, "\n\n");
1164 fprintf (dump_file, "Induction variables:\n\n");
1166 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1168 if (ver_info (data, i)->iv)
1169 dump_iv (dump_file, ver_info (data, i)->iv);
1173 return true;
1176 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1178 static struct iv_use *
1179 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1180 gimple stmt, enum use_type use_type)
1182 struct iv_use *use = XCNEW (struct iv_use);
1184 use->id = n_iv_uses (data);
1185 use->type = use_type;
1186 use->iv = iv;
1187 use->stmt = stmt;
1188 use->op_p = use_p;
1189 use->related_cands = BITMAP_ALLOC (NULL);
1191 /* To avoid showing ssa name in the dumps, if it was not reset by the
1192 caller. */
1193 iv->ssa_name = NULL_TREE;
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 dump_use (dump_file, use);
1198 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1200 return use;
1203 /* Checks whether OP is a loop-level invariant and if so, records it.
1204 NONLINEAR_USE is true if the invariant is used in a way we do not
1205 handle specially. */
1207 static void
1208 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1210 basic_block bb;
1211 struct version_info *info;
1213 if (TREE_CODE (op) != SSA_NAME
1214 || !is_gimple_reg (op))
1215 return;
1217 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1218 if (bb
1219 && flow_bb_inside_loop_p (data->current_loop, bb))
1220 return;
1222 info = name_info (data, op);
1223 info->name = op;
1224 info->has_nonlin_use |= nonlinear_use;
1225 if (!info->inv_id)
1226 info->inv_id = ++data->max_inv_id;
1227 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1230 /* Checks whether the use OP is interesting and if so, records it. */
1232 static struct iv_use *
1233 find_interesting_uses_op (struct ivopts_data *data, tree op)
1235 struct iv *iv;
1236 struct iv *civ;
1237 gimple stmt;
1238 struct iv_use *use;
1240 if (TREE_CODE (op) != SSA_NAME)
1241 return NULL;
1243 iv = get_iv (data, op);
1244 if (!iv)
1245 return NULL;
1247 if (iv->have_use_for)
1249 use = iv_use (data, iv->use_id);
1251 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1252 return use;
1255 if (integer_zerop (iv->step))
1257 record_invariant (data, op, true);
1258 return NULL;
1260 iv->have_use_for = true;
1262 civ = XNEW (struct iv);
1263 *civ = *iv;
1265 stmt = SSA_NAME_DEF_STMT (op);
1266 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1267 || is_gimple_assign (stmt));
1269 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1270 iv->use_id = use->id;
1272 return use;
1275 /* Given a condition in statement STMT, checks whether it is a compare
1276 of an induction variable and an invariant. If this is the case,
1277 CONTROL_VAR is set to location of the iv, BOUND to the location of
1278 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1279 induction variable descriptions, and true is returned. If this is not
1280 the case, CONTROL_VAR and BOUND are set to the arguments of the
1281 condition and false is returned. */
1283 static bool
1284 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1285 tree **control_var, tree **bound,
1286 struct iv **iv_var, struct iv **iv_bound)
1288 /* The objects returned when COND has constant operands. */
1289 static struct iv const_iv;
1290 static tree zero;
1291 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1292 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1293 bool ret = false;
1295 if (gimple_code (stmt) == GIMPLE_COND)
1297 op0 = gimple_cond_lhs_ptr (stmt);
1298 op1 = gimple_cond_rhs_ptr (stmt);
1300 else
1302 op0 = gimple_assign_rhs1_ptr (stmt);
1303 op1 = gimple_assign_rhs2_ptr (stmt);
1306 zero = integer_zero_node;
1307 const_iv.step = integer_zero_node;
1309 if (TREE_CODE (*op0) == SSA_NAME)
1310 iv0 = get_iv (data, *op0);
1311 if (TREE_CODE (*op1) == SSA_NAME)
1312 iv1 = get_iv (data, *op1);
1314 /* Exactly one of the compared values must be an iv, and the other one must
1315 be an invariant. */
1316 if (!iv0 || !iv1)
1317 goto end;
1319 if (integer_zerop (iv0->step))
1321 /* Control variable may be on the other side. */
1322 tmp_op = op0; op0 = op1; op1 = tmp_op;
1323 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1325 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1327 end:
1328 if (control_var)
1329 *control_var = op0;;
1330 if (iv_var)
1331 *iv_var = iv0;;
1332 if (bound)
1333 *bound = op1;
1334 if (iv_bound)
1335 *iv_bound = iv1;
1337 return ret;
1340 /* Checks whether the condition in STMT is interesting and if so,
1341 records it. */
1343 static void
1344 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1346 tree *var_p, *bound_p;
1347 struct iv *var_iv, *civ;
1349 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1351 find_interesting_uses_op (data, *var_p);
1352 find_interesting_uses_op (data, *bound_p);
1353 return;
1356 civ = XNEW (struct iv);
1357 *civ = *var_iv;
1358 record_use (data, NULL, civ, stmt, USE_COMPARE);
1361 /* Returns true if expression EXPR is obviously invariant in LOOP,
1362 i.e. if all its operands are defined outside of the LOOP. LOOP
1363 should not be the function body. */
1365 bool
1366 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1368 basic_block def_bb;
1369 unsigned i, len;
1371 gcc_assert (loop_depth (loop) > 0);
1373 if (is_gimple_min_invariant (expr))
1374 return true;
1376 if (TREE_CODE (expr) == SSA_NAME)
1378 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1379 if (def_bb
1380 && flow_bb_inside_loop_p (loop, def_bb))
1381 return false;
1383 return true;
1386 if (!EXPR_P (expr))
1387 return false;
1389 len = TREE_OPERAND_LENGTH (expr);
1390 for (i = 0; i < len; i++)
1391 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1392 return false;
1394 return true;
1397 /* Returns true if statement STMT is obviously invariant in LOOP,
1398 i.e. if all its operands on the RHS are defined outside of the LOOP.
1399 LOOP should not be the function body. */
1401 bool
1402 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1404 unsigned i;
1405 tree lhs;
1407 gcc_assert (loop_depth (loop) > 0);
1409 lhs = gimple_get_lhs (stmt);
1410 for (i = 0; i < gimple_num_ops (stmt); i++)
1412 tree op = gimple_op (stmt, i);
1413 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1414 return false;
1417 return true;
1420 /* Cumulates the steps of indices into DATA and replaces their values with the
1421 initial ones. Returns false when the value of the index cannot be determined.
1422 Callback for for_each_index. */
1424 struct ifs_ivopts_data
1426 struct ivopts_data *ivopts_data;
1427 gimple stmt;
1428 tree step;
1431 static bool
1432 idx_find_step (tree base, tree *idx, void *data)
1434 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1435 struct iv *iv;
1436 tree step, iv_base, iv_step, lbound, off;
1437 struct loop *loop = dta->ivopts_data->current_loop;
1439 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1440 return false;
1442 /* If base is a component ref, require that the offset of the reference
1443 be invariant. */
1444 if (TREE_CODE (base) == COMPONENT_REF)
1446 off = component_ref_field_offset (base);
1447 return expr_invariant_in_loop_p (loop, off);
1450 /* If base is array, first check whether we will be able to move the
1451 reference out of the loop (in order to take its address in strength
1452 reduction). In order for this to work we need both lower bound
1453 and step to be loop invariants. */
1454 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1456 /* Moreover, for a range, the size needs to be invariant as well. */
1457 if (TREE_CODE (base) == ARRAY_RANGE_REF
1458 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1459 return false;
1461 step = array_ref_element_size (base);
1462 lbound = array_ref_low_bound (base);
1464 if (!expr_invariant_in_loop_p (loop, step)
1465 || !expr_invariant_in_loop_p (loop, lbound))
1466 return false;
1469 if (TREE_CODE (*idx) != SSA_NAME)
1470 return true;
1472 iv = get_iv (dta->ivopts_data, *idx);
1473 if (!iv)
1474 return false;
1476 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1477 *&x[0], which is not folded and does not trigger the
1478 ARRAY_REF path below. */
1479 *idx = iv->base;
1481 if (integer_zerop (iv->step))
1482 return true;
1484 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1486 step = array_ref_element_size (base);
1488 /* We only handle addresses whose step is an integer constant. */
1489 if (TREE_CODE (step) != INTEGER_CST)
1490 return false;
1492 else
1493 /* The step for pointer arithmetics already is 1 byte. */
1494 step = size_one_node;
1496 iv_base = iv->base;
1497 iv_step = iv->step;
1498 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1499 sizetype, &iv_base, &iv_step, dta->stmt,
1500 false))
1502 /* The index might wrap. */
1503 return false;
1506 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1507 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1509 return true;
1512 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1513 object is passed to it in DATA. */
1515 static bool
1516 idx_record_use (tree base, tree *idx,
1517 void *vdata)
1519 struct ivopts_data *data = (struct ivopts_data *) vdata;
1520 find_interesting_uses_op (data, *idx);
1521 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1523 find_interesting_uses_op (data, array_ref_element_size (base));
1524 find_interesting_uses_op (data, array_ref_low_bound (base));
1526 return true;
1529 /* If we can prove that TOP = cst * BOT for some constant cst,
1530 store cst to MUL and return true. Otherwise return false.
1531 The returned value is always sign-extended, regardless of the
1532 signedness of TOP and BOT. */
1534 static bool
1535 constant_multiple_of (tree top, tree bot, double_int *mul)
1537 tree mby;
1538 enum tree_code code;
1539 double_int res, p0, p1;
1540 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1542 STRIP_NOPS (top);
1543 STRIP_NOPS (bot);
1545 if (operand_equal_p (top, bot, 0))
1547 *mul = double_int_one;
1548 return true;
1551 code = TREE_CODE (top);
1552 switch (code)
1554 case MULT_EXPR:
1555 mby = TREE_OPERAND (top, 1);
1556 if (TREE_CODE (mby) != INTEGER_CST)
1557 return false;
1559 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1560 return false;
1562 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1563 precision);
1564 return true;
1566 case PLUS_EXPR:
1567 case MINUS_EXPR:
1568 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1569 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1570 return false;
1572 if (code == MINUS_EXPR)
1573 p1 = double_int_neg (p1);
1574 *mul = double_int_sext (double_int_add (p0, p1), precision);
1575 return true;
1577 case INTEGER_CST:
1578 if (TREE_CODE (bot) != INTEGER_CST)
1579 return false;
1581 p0 = double_int_sext (tree_to_double_int (top), precision);
1582 p1 = double_int_sext (tree_to_double_int (bot), precision);
1583 if (double_int_zero_p (p1))
1584 return false;
1585 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1586 precision);
1587 return double_int_zero_p (res);
1589 default:
1590 return false;
1594 /* Returns true if memory reference REF with step STEP may be unaligned. */
1596 static bool
1597 may_be_unaligned_p (tree ref, tree step)
1599 tree base;
1600 tree base_type;
1601 HOST_WIDE_INT bitsize;
1602 HOST_WIDE_INT bitpos;
1603 tree toffset;
1604 enum machine_mode mode;
1605 int unsignedp, volatilep;
1606 unsigned base_align;
1608 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1609 thus they are not misaligned. */
1610 if (TREE_CODE (ref) == TARGET_MEM_REF)
1611 return false;
1613 /* The test below is basically copy of what expr.c:normal_inner_ref
1614 does to check whether the object must be loaded by parts when
1615 STRICT_ALIGNMENT is true. */
1616 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1617 &unsignedp, &volatilep, true);
1618 base_type = TREE_TYPE (base);
1619 base_align = TYPE_ALIGN (base_type);
1621 if (mode != BLKmode)
1623 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1625 if (base_align < mode_align
1626 || (bitpos % mode_align) != 0
1627 || (bitpos % BITS_PER_UNIT) != 0)
1628 return true;
1630 if (toffset
1631 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1632 return true;
1634 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1635 return true;
1638 return false;
1641 /* Return true if EXPR may be non-addressable. */
1643 static bool
1644 may_be_nonaddressable_p (tree expr)
1646 switch (TREE_CODE (expr))
1648 case TARGET_MEM_REF:
1649 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1650 target, thus they are always addressable. */
1651 return false;
1653 case COMPONENT_REF:
1654 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1655 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1657 case VIEW_CONVERT_EXPR:
1658 /* This kind of view-conversions may wrap non-addressable objects
1659 and make them look addressable. After some processing the
1660 non-addressability may be uncovered again, causing ADDR_EXPRs
1661 of inappropriate objects to be built. */
1662 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1663 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1664 return true;
1666 /* ... fall through ... */
1668 case ARRAY_REF:
1669 case ARRAY_RANGE_REF:
1670 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1672 CASE_CONVERT:
1673 return true;
1675 default:
1676 break;
1679 return false;
1682 /* Finds addresses in *OP_P inside STMT. */
1684 static void
1685 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1687 tree base = *op_p, step = size_zero_node;
1688 struct iv *civ;
1689 struct ifs_ivopts_data ifs_ivopts_data;
1691 /* Do not play with volatile memory references. A bit too conservative,
1692 perhaps, but safe. */
1693 if (gimple_has_volatile_ops (stmt))
1694 goto fail;
1696 /* Ignore bitfields for now. Not really something terribly complicated
1697 to handle. TODO. */
1698 if (TREE_CODE (base) == BIT_FIELD_REF)
1699 goto fail;
1701 base = unshare_expr (base);
1703 if (TREE_CODE (base) == TARGET_MEM_REF)
1705 tree type = build_pointer_type (TREE_TYPE (base));
1706 tree astep;
1708 if (TMR_BASE (base)
1709 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1711 civ = get_iv (data, TMR_BASE (base));
1712 if (!civ)
1713 goto fail;
1715 TMR_BASE (base) = civ->base;
1716 step = civ->step;
1718 if (TMR_INDEX (base)
1719 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1721 civ = get_iv (data, TMR_INDEX (base));
1722 if (!civ)
1723 goto fail;
1725 TMR_INDEX (base) = civ->base;
1726 astep = civ->step;
1728 if (astep)
1730 if (TMR_STEP (base))
1731 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1733 step = fold_build2 (PLUS_EXPR, type, step, astep);
1737 if (integer_zerop (step))
1738 goto fail;
1739 base = tree_mem_ref_addr (type, base);
1741 else
1743 ifs_ivopts_data.ivopts_data = data;
1744 ifs_ivopts_data.stmt = stmt;
1745 ifs_ivopts_data.step = size_zero_node;
1746 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1747 || integer_zerop (ifs_ivopts_data.step))
1748 goto fail;
1749 step = ifs_ivopts_data.step;
1751 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1753 /* Check that the base expression is addressable. This needs
1754 to be done after substituting bases of IVs into it. */
1755 if (may_be_nonaddressable_p (base))
1756 goto fail;
1758 /* Moreover, on strict alignment platforms, check that it is
1759 sufficiently aligned. */
1760 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1761 goto fail;
1763 base = build_fold_addr_expr (base);
1765 /* Substituting bases of IVs into the base expression might
1766 have caused folding opportunities. */
1767 if (TREE_CODE (base) == ADDR_EXPR)
1769 tree *ref = &TREE_OPERAND (base, 0);
1770 while (handled_component_p (*ref))
1771 ref = &TREE_OPERAND (*ref, 0);
1772 if (TREE_CODE (*ref) == MEM_REF)
1774 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1775 TREE_OPERAND (*ref, 0),
1776 TREE_OPERAND (*ref, 1));
1777 if (tem)
1778 *ref = tem;
1783 civ = alloc_iv (base, step);
1784 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1785 return;
1787 fail:
1788 for_each_index (op_p, idx_record_use, data);
1791 /* Finds and records invariants used in STMT. */
1793 static void
1794 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1796 ssa_op_iter iter;
1797 use_operand_p use_p;
1798 tree op;
1800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1802 op = USE_FROM_PTR (use_p);
1803 record_invariant (data, op, false);
1807 /* Finds interesting uses of induction variables in the statement STMT. */
1809 static void
1810 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1812 struct iv *iv;
1813 tree op, *lhs, *rhs;
1814 ssa_op_iter iter;
1815 use_operand_p use_p;
1816 enum tree_code code;
1818 find_invariants_stmt (data, stmt);
1820 if (gimple_code (stmt) == GIMPLE_COND)
1822 find_interesting_uses_cond (data, stmt);
1823 return;
1826 if (is_gimple_assign (stmt))
1828 lhs = gimple_assign_lhs_ptr (stmt);
1829 rhs = gimple_assign_rhs1_ptr (stmt);
1831 if (TREE_CODE (*lhs) == SSA_NAME)
1833 /* If the statement defines an induction variable, the uses are not
1834 interesting by themselves. */
1836 iv = get_iv (data, *lhs);
1838 if (iv && !integer_zerop (iv->step))
1839 return;
1842 code = gimple_assign_rhs_code (stmt);
1843 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1844 && (REFERENCE_CLASS_P (*rhs)
1845 || is_gimple_val (*rhs)))
1847 if (REFERENCE_CLASS_P (*rhs))
1848 find_interesting_uses_address (data, stmt, rhs);
1849 else
1850 find_interesting_uses_op (data, *rhs);
1852 if (REFERENCE_CLASS_P (*lhs))
1853 find_interesting_uses_address (data, stmt, lhs);
1854 return;
1856 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1858 find_interesting_uses_cond (data, stmt);
1859 return;
1862 /* TODO -- we should also handle address uses of type
1864 memory = call (whatever);
1868 call (memory). */
1871 if (gimple_code (stmt) == GIMPLE_PHI
1872 && gimple_bb (stmt) == data->current_loop->header)
1874 iv = get_iv (data, PHI_RESULT (stmt));
1876 if (iv && !integer_zerop (iv->step))
1877 return;
1880 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1882 op = USE_FROM_PTR (use_p);
1884 if (TREE_CODE (op) != SSA_NAME)
1885 continue;
1887 iv = get_iv (data, op);
1888 if (!iv)
1889 continue;
1891 find_interesting_uses_op (data, op);
1895 /* Finds interesting uses of induction variables outside of loops
1896 on loop exit edge EXIT. */
1898 static void
1899 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1901 gimple phi;
1902 gimple_stmt_iterator psi;
1903 tree def;
1905 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1907 phi = gsi_stmt (psi);
1908 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1909 if (is_gimple_reg (def))
1910 find_interesting_uses_op (data, def);
1914 /* Finds uses of the induction variables that are interesting. */
1916 static void
1917 find_interesting_uses (struct ivopts_data *data)
1919 basic_block bb;
1920 gimple_stmt_iterator bsi;
1921 basic_block *body = get_loop_body (data->current_loop);
1922 unsigned i;
1923 struct version_info *info;
1924 edge e;
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1927 fprintf (dump_file, "Uses:\n\n");
1929 for (i = 0; i < data->current_loop->num_nodes; i++)
1931 edge_iterator ei;
1932 bb = body[i];
1934 FOR_EACH_EDGE (e, ei, bb->succs)
1935 if (e->dest != EXIT_BLOCK_PTR
1936 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1937 find_interesting_uses_outside (data, e);
1939 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1940 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1941 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1942 if (!is_gimple_debug (gsi_stmt (bsi)))
1943 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 bitmap_iterator bi;
1950 fprintf (dump_file, "\n");
1952 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1954 info = ver_info (data, i);
1955 if (info->inv_id)
1957 fprintf (dump_file, " ");
1958 print_generic_expr (dump_file, info->name, TDF_SLIM);
1959 fprintf (dump_file, " is invariant (%d)%s\n",
1960 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1964 fprintf (dump_file, "\n");
1967 free (body);
1970 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1971 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1972 we are at the top-level of the processed address. */
1974 static tree
1975 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1976 unsigned HOST_WIDE_INT *offset)
1978 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1979 enum tree_code code;
1980 tree type, orig_type = TREE_TYPE (expr);
1981 unsigned HOST_WIDE_INT off0, off1, st;
1982 tree orig_expr = expr;
1984 STRIP_NOPS (expr);
1986 type = TREE_TYPE (expr);
1987 code = TREE_CODE (expr);
1988 *offset = 0;
1990 switch (code)
1992 case INTEGER_CST:
1993 if (!cst_and_fits_in_hwi (expr)
1994 || integer_zerop (expr))
1995 return orig_expr;
1997 *offset = int_cst_value (expr);
1998 return build_int_cst (orig_type, 0);
2000 case POINTER_PLUS_EXPR:
2001 case PLUS_EXPR:
2002 case MINUS_EXPR:
2003 op0 = TREE_OPERAND (expr, 0);
2004 op1 = TREE_OPERAND (expr, 1);
2006 op0 = strip_offset_1 (op0, false, false, &off0);
2007 op1 = strip_offset_1 (op1, false, false, &off1);
2009 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2010 if (op0 == TREE_OPERAND (expr, 0)
2011 && op1 == TREE_OPERAND (expr, 1))
2012 return orig_expr;
2014 if (integer_zerop (op1))
2015 expr = op0;
2016 else if (integer_zerop (op0))
2018 if (code == MINUS_EXPR)
2019 expr = fold_build1 (NEGATE_EXPR, type, op1);
2020 else
2021 expr = op1;
2023 else
2024 expr = fold_build2 (code, type, op0, op1);
2026 return fold_convert (orig_type, expr);
2028 case MULT_EXPR:
2029 op1 = TREE_OPERAND (expr, 1);
2030 if (!cst_and_fits_in_hwi (op1))
2031 return orig_expr;
2033 op0 = TREE_OPERAND (expr, 0);
2034 op0 = strip_offset_1 (op0, false, false, &off0);
2035 if (op0 == TREE_OPERAND (expr, 0))
2036 return orig_expr;
2038 *offset = off0 * int_cst_value (op1);
2039 if (integer_zerop (op0))
2040 expr = op0;
2041 else
2042 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2044 return fold_convert (orig_type, expr);
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (!inside_addr)
2049 return orig_expr;
2051 step = array_ref_element_size (expr);
2052 if (!cst_and_fits_in_hwi (step))
2053 break;
2055 st = int_cst_value (step);
2056 op1 = TREE_OPERAND (expr, 1);
2057 op1 = strip_offset_1 (op1, false, false, &off1);
2058 *offset = off1 * st;
2060 if (top_compref
2061 && integer_zerop (op1))
2063 /* Strip the component reference completely. */
2064 op0 = TREE_OPERAND (expr, 0);
2065 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2066 *offset += off0;
2067 return op0;
2069 break;
2071 case COMPONENT_REF:
2072 if (!inside_addr)
2073 return orig_expr;
2075 tmp = component_ref_field_offset (expr);
2076 if (top_compref
2077 && cst_and_fits_in_hwi (tmp))
2079 /* Strip the component reference completely. */
2080 op0 = TREE_OPERAND (expr, 0);
2081 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2082 *offset = off0 + int_cst_value (tmp);
2083 return op0;
2085 break;
2087 case ADDR_EXPR:
2088 op0 = TREE_OPERAND (expr, 0);
2089 op0 = strip_offset_1 (op0, true, true, &off0);
2090 *offset += off0;
2092 if (op0 == TREE_OPERAND (expr, 0))
2093 return orig_expr;
2095 expr = build_fold_addr_expr (op0);
2096 return fold_convert (orig_type, expr);
2098 case MEM_REF:
2099 /* ??? Offset operand? */
2100 inside_addr = false;
2101 break;
2103 default:
2104 return orig_expr;
2107 /* Default handling of expressions for that we want to recurse into
2108 the first operand. */
2109 op0 = TREE_OPERAND (expr, 0);
2110 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2111 *offset += off0;
2113 if (op0 == TREE_OPERAND (expr, 0)
2114 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2115 return orig_expr;
2117 expr = copy_node (expr);
2118 TREE_OPERAND (expr, 0) = op0;
2119 if (op1)
2120 TREE_OPERAND (expr, 1) = op1;
2122 /* Inside address, we might strip the top level component references,
2123 thus changing type of the expression. Handling of ADDR_EXPR
2124 will fix that. */
2125 expr = fold_convert (orig_type, expr);
2127 return expr;
2130 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2132 static tree
2133 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2135 return strip_offset_1 (expr, false, false, offset);
2138 /* Returns variant of TYPE that can be used as base for different uses.
2139 We return unsigned type with the same precision, which avoids problems
2140 with overflows. */
2142 static tree
2143 generic_type_for (tree type)
2145 if (POINTER_TYPE_P (type))
2146 return unsigned_type_for (type);
2148 if (TYPE_UNSIGNED (type))
2149 return type;
2151 return unsigned_type_for (type);
2154 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2155 the bitmap to that we should store it. */
2157 static struct ivopts_data *fd_ivopts_data;
2158 static tree
2159 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2161 bitmap *depends_on = (bitmap *) data;
2162 struct version_info *info;
2164 if (TREE_CODE (*expr_p) != SSA_NAME)
2165 return NULL_TREE;
2166 info = name_info (fd_ivopts_data, *expr_p);
2168 if (!info->inv_id || info->has_nonlin_use)
2169 return NULL_TREE;
2171 if (!*depends_on)
2172 *depends_on = BITMAP_ALLOC (NULL);
2173 bitmap_set_bit (*depends_on, info->inv_id);
2175 return NULL_TREE;
2178 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2179 position to POS. If USE is not NULL, the candidate is set as related to
2180 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2181 replacement of the final value of the iv by a direct computation. */
2183 static struct iv_cand *
2184 add_candidate_1 (struct ivopts_data *data,
2185 tree base, tree step, bool important, enum iv_position pos,
2186 struct iv_use *use, gimple incremented_at)
2188 unsigned i;
2189 struct iv_cand *cand = NULL;
2190 tree type, orig_type;
2192 if (base)
2194 orig_type = TREE_TYPE (base);
2195 type = generic_type_for (orig_type);
2196 if (type != orig_type)
2198 base = fold_convert (type, base);
2199 step = fold_convert (type, step);
2203 for (i = 0; i < n_iv_cands (data); i++)
2205 cand = iv_cand (data, i);
2207 if (cand->pos != pos)
2208 continue;
2210 if (cand->incremented_at != incremented_at
2211 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2212 && cand->ainc_use != use))
2213 continue;
2215 if (!cand->iv)
2217 if (!base && !step)
2218 break;
2220 continue;
2223 if (!base && !step)
2224 continue;
2226 if (operand_equal_p (base, cand->iv->base, 0)
2227 && operand_equal_p (step, cand->iv->step, 0)
2228 && (TYPE_PRECISION (TREE_TYPE (base))
2229 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2230 break;
2233 if (i == n_iv_cands (data))
2235 cand = XCNEW (struct iv_cand);
2236 cand->id = i;
2238 if (!base && !step)
2239 cand->iv = NULL;
2240 else
2241 cand->iv = alloc_iv (base, step);
2243 cand->pos = pos;
2244 if (pos != IP_ORIGINAL && cand->iv)
2246 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2247 cand->var_after = cand->var_before;
2249 cand->important = important;
2250 cand->incremented_at = incremented_at;
2251 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2253 if (step
2254 && TREE_CODE (step) != INTEGER_CST)
2256 fd_ivopts_data = data;
2257 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2260 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2261 cand->ainc_use = use;
2262 else
2263 cand->ainc_use = NULL;
2265 if (dump_file && (dump_flags & TDF_DETAILS))
2266 dump_cand (dump_file, cand);
2269 if (important && !cand->important)
2271 cand->important = true;
2272 if (dump_file && (dump_flags & TDF_DETAILS))
2273 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2276 if (use)
2278 bitmap_set_bit (use->related_cands, i);
2279 if (dump_file && (dump_flags & TDF_DETAILS))
2280 fprintf (dump_file, "Candidate %d is related to use %d\n",
2281 cand->id, use->id);
2284 return cand;
2287 /* Returns true if incrementing the induction variable at the end of the LOOP
2288 is allowed.
2290 The purpose is to avoid splitting latch edge with a biv increment, thus
2291 creating a jump, possibly confusing other optimization passes and leaving
2292 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2293 is not available (so we do not have a better alternative), or if the latch
2294 edge is already nonempty. */
2296 static bool
2297 allow_ip_end_pos_p (struct loop *loop)
2299 if (!ip_normal_pos (loop))
2300 return true;
2302 if (!empty_block_p (ip_end_pos (loop)))
2303 return true;
2305 return false;
2308 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2309 Important field is set to IMPORTANT. */
2311 static void
2312 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2313 bool important, struct iv_use *use)
2315 basic_block use_bb = gimple_bb (use->stmt);
2316 enum machine_mode mem_mode;
2317 unsigned HOST_WIDE_INT cstepi;
2319 /* If we insert the increment in any position other than the standard
2320 ones, we must ensure that it is incremented once per iteration.
2321 It must not be in an inner nested loop, or one side of an if
2322 statement. */
2323 if (use_bb->loop_father != data->current_loop
2324 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2325 || stmt_could_throw_p (use->stmt)
2326 || !cst_and_fits_in_hwi (step))
2327 return;
2329 cstepi = int_cst_value (step);
2331 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2332 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2333 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2335 enum tree_code code = MINUS_EXPR;
2336 tree new_base;
2337 tree new_step = step;
2339 if (POINTER_TYPE_P (TREE_TYPE (base)))
2341 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2342 code = POINTER_PLUS_EXPR;
2344 else
2345 new_step = fold_convert (TREE_TYPE (base), new_step);
2346 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2347 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2348 use->stmt);
2350 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2351 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2353 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2354 use->stmt);
2358 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2359 position to POS. If USE is not NULL, the candidate is set as related to
2360 it. The candidate computation is scheduled on all available positions. */
2362 static void
2363 add_candidate (struct ivopts_data *data,
2364 tree base, tree step, bool important, struct iv_use *use)
2366 if (ip_normal_pos (data->current_loop))
2367 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2368 if (ip_end_pos (data->current_loop)
2369 && allow_ip_end_pos_p (data->current_loop))
2370 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2372 if (use != NULL && use->type == USE_ADDRESS)
2373 add_autoinc_candidates (data, base, step, important, use);
2376 /* Add a standard "0 + 1 * iteration" iv candidate for a
2377 type with SIZE bits. */
2379 static void
2380 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2381 unsigned int size)
2383 tree type = lang_hooks.types.type_for_size (size, true);
2384 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2385 true, NULL);
2388 /* Adds standard iv candidates. */
2390 static void
2391 add_standard_iv_candidates (struct ivopts_data *data)
2393 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2395 /* The same for a double-integer type if it is still fast enough. */
2396 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2397 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2401 /* Adds candidates bases on the old induction variable IV. */
2403 static void
2404 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2406 gimple phi;
2407 tree def;
2408 struct iv_cand *cand;
2410 add_candidate (data, iv->base, iv->step, true, NULL);
2412 /* The same, but with initial value zero. */
2413 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2414 add_candidate (data, size_int (0), iv->step, true, NULL);
2415 else
2416 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2417 iv->step, true, NULL);
2419 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2420 if (gimple_code (phi) == GIMPLE_PHI)
2422 /* Additionally record the possibility of leaving the original iv
2423 untouched. */
2424 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2425 cand = add_candidate_1 (data,
2426 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2427 SSA_NAME_DEF_STMT (def));
2428 cand->var_before = iv->ssa_name;
2429 cand->var_after = def;
2433 /* Adds candidates based on the old induction variables. */
2435 static void
2436 add_old_ivs_candidates (struct ivopts_data *data)
2438 unsigned i;
2439 struct iv *iv;
2440 bitmap_iterator bi;
2442 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2444 iv = ver_info (data, i)->iv;
2445 if (iv && iv->biv_p && !integer_zerop (iv->step))
2446 add_old_iv_candidates (data, iv);
2450 /* Adds candidates based on the value of the induction variable IV and USE. */
2452 static void
2453 add_iv_value_candidates (struct ivopts_data *data,
2454 struct iv *iv, struct iv_use *use)
2456 unsigned HOST_WIDE_INT offset;
2457 tree base;
2458 tree basetype;
2460 add_candidate (data, iv->base, iv->step, false, use);
2462 /* The same, but with initial value zero. Make such variable important,
2463 since it is generic enough so that possibly many uses may be based
2464 on it. */
2465 basetype = TREE_TYPE (iv->base);
2466 if (POINTER_TYPE_P (basetype))
2467 basetype = sizetype;
2468 add_candidate (data, build_int_cst (basetype, 0),
2469 iv->step, true, use);
2471 /* Third, try removing the constant offset. Make sure to even
2472 add a candidate for &a[0] vs. (T *)&a. */
2473 base = strip_offset (iv->base, &offset);
2474 if (offset
2475 || base != iv->base)
2476 add_candidate (data, base, iv->step, false, use);
2479 /* Adds candidates based on the uses. */
2481 static void
2482 add_derived_ivs_candidates (struct ivopts_data *data)
2484 unsigned i;
2486 for (i = 0; i < n_iv_uses (data); i++)
2488 struct iv_use *use = iv_use (data, i);
2490 if (!use)
2491 continue;
2493 switch (use->type)
2495 case USE_NONLINEAR_EXPR:
2496 case USE_COMPARE:
2497 case USE_ADDRESS:
2498 /* Just add the ivs based on the value of the iv used here. */
2499 add_iv_value_candidates (data, use->iv, use);
2500 break;
2502 default:
2503 gcc_unreachable ();
2508 /* Record important candidates and add them to related_cands bitmaps
2509 if needed. */
2511 static void
2512 record_important_candidates (struct ivopts_data *data)
2514 unsigned i;
2515 struct iv_use *use;
2517 for (i = 0; i < n_iv_cands (data); i++)
2519 struct iv_cand *cand = iv_cand (data, i);
2521 if (cand->important)
2522 bitmap_set_bit (data->important_candidates, i);
2525 data->consider_all_candidates = (n_iv_cands (data)
2526 <= CONSIDER_ALL_CANDIDATES_BOUND);
2528 if (data->consider_all_candidates)
2530 /* We will not need "related_cands" bitmaps in this case,
2531 so release them to decrease peak memory consumption. */
2532 for (i = 0; i < n_iv_uses (data); i++)
2534 use = iv_use (data, i);
2535 BITMAP_FREE (use->related_cands);
2538 else
2540 /* Add important candidates to the related_cands bitmaps. */
2541 for (i = 0; i < n_iv_uses (data); i++)
2542 bitmap_ior_into (iv_use (data, i)->related_cands,
2543 data->important_candidates);
2547 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2548 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2549 we allocate a simple list to every use. */
2551 static void
2552 alloc_use_cost_map (struct ivopts_data *data)
2554 unsigned i, size, s, j;
2556 for (i = 0; i < n_iv_uses (data); i++)
2558 struct iv_use *use = iv_use (data, i);
2559 bitmap_iterator bi;
2561 if (data->consider_all_candidates)
2562 size = n_iv_cands (data);
2563 else
2565 s = 0;
2566 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2568 s++;
2571 /* Round up to the power of two, so that moduling by it is fast. */
2572 for (size = 1; size < s; size <<= 1)
2573 continue;
2576 use->n_map_members = size;
2577 use->cost_map = XCNEWVEC (struct cost_pair, size);
2581 /* Returns description of computation cost of expression whose runtime
2582 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2584 static comp_cost
2585 new_cost (unsigned runtime, unsigned complexity)
2587 comp_cost cost;
2589 cost.cost = runtime;
2590 cost.complexity = complexity;
2592 return cost;
2595 /* Adds costs COST1 and COST2. */
2597 static comp_cost
2598 add_costs (comp_cost cost1, comp_cost cost2)
2600 cost1.cost += cost2.cost;
2601 cost1.complexity += cost2.complexity;
2603 return cost1;
2605 /* Subtracts costs COST1 and COST2. */
2607 static comp_cost
2608 sub_costs (comp_cost cost1, comp_cost cost2)
2610 cost1.cost -= cost2.cost;
2611 cost1.complexity -= cost2.complexity;
2613 return cost1;
2616 /* Returns a negative number if COST1 < COST2, a positive number if
2617 COST1 > COST2, and 0 if COST1 = COST2. */
2619 static int
2620 compare_costs (comp_cost cost1, comp_cost cost2)
2622 if (cost1.cost == cost2.cost)
2623 return cost1.complexity - cost2.complexity;
2625 return cost1.cost - cost2.cost;
2628 /* Returns true if COST is infinite. */
2630 static bool
2631 infinite_cost_p (comp_cost cost)
2633 return cost.cost == INFTY;
2636 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2637 on invariants DEPENDS_ON and that the value used in expressing it
2638 is VALUE. */
2640 static void
2641 set_use_iv_cost (struct ivopts_data *data,
2642 struct iv_use *use, struct iv_cand *cand,
2643 comp_cost cost, bitmap depends_on, tree value,
2644 int inv_expr_id)
2646 unsigned i, s;
2648 if (infinite_cost_p (cost))
2650 BITMAP_FREE (depends_on);
2651 return;
2654 if (data->consider_all_candidates)
2656 use->cost_map[cand->id].cand = cand;
2657 use->cost_map[cand->id].cost = cost;
2658 use->cost_map[cand->id].depends_on = depends_on;
2659 use->cost_map[cand->id].value = value;
2660 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2661 return;
2664 /* n_map_members is a power of two, so this computes modulo. */
2665 s = cand->id & (use->n_map_members - 1);
2666 for (i = s; i < use->n_map_members; i++)
2667 if (!use->cost_map[i].cand)
2668 goto found;
2669 for (i = 0; i < s; i++)
2670 if (!use->cost_map[i].cand)
2671 goto found;
2673 gcc_unreachable ();
2675 found:
2676 use->cost_map[i].cand = cand;
2677 use->cost_map[i].cost = cost;
2678 use->cost_map[i].depends_on = depends_on;
2679 use->cost_map[i].value = value;
2680 use->cost_map[i].inv_expr_id = inv_expr_id;
2683 /* Gets cost of (USE, CANDIDATE) pair. */
2685 static struct cost_pair *
2686 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2687 struct iv_cand *cand)
2689 unsigned i, s;
2690 struct cost_pair *ret;
2692 if (!cand)
2693 return NULL;
2695 if (data->consider_all_candidates)
2697 ret = use->cost_map + cand->id;
2698 if (!ret->cand)
2699 return NULL;
2701 return ret;
2704 /* n_map_members is a power of two, so this computes modulo. */
2705 s = cand->id & (use->n_map_members - 1);
2706 for (i = s; i < use->n_map_members; i++)
2707 if (use->cost_map[i].cand == cand)
2708 return use->cost_map + i;
2710 for (i = 0; i < s; i++)
2711 if (use->cost_map[i].cand == cand)
2712 return use->cost_map + i;
2714 return NULL;
2717 /* Returns estimate on cost of computing SEQ. */
2719 static unsigned
2720 seq_cost (rtx seq, bool speed)
2722 unsigned cost = 0;
2723 rtx set;
2725 for (; seq; seq = NEXT_INSN (seq))
2727 set = single_set (seq);
2728 if (set)
2729 cost += rtx_cost (set, SET,speed);
2730 else
2731 cost++;
2734 return cost;
2737 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2738 static rtx
2739 produce_memory_decl_rtl (tree obj, int *regno)
2741 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2742 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2743 rtx x;
2745 gcc_assert (obj);
2746 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2748 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2749 x = gen_rtx_SYMBOL_REF (address_mode, name);
2750 SET_SYMBOL_REF_DECL (x, obj);
2751 x = gen_rtx_MEM (DECL_MODE (obj), x);
2752 set_mem_addr_space (x, as);
2753 targetm.encode_section_info (obj, x, true);
2755 else
2757 x = gen_raw_REG (address_mode, (*regno)++);
2758 x = gen_rtx_MEM (DECL_MODE (obj), x);
2759 set_mem_addr_space (x, as);
2762 return x;
2765 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2766 walk_tree. DATA contains the actual fake register number. */
2768 static tree
2769 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2771 tree obj = NULL_TREE;
2772 rtx x = NULL_RTX;
2773 int *regno = (int *) data;
2775 switch (TREE_CODE (*expr_p))
2777 case ADDR_EXPR:
2778 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2779 handled_component_p (*expr_p);
2780 expr_p = &TREE_OPERAND (*expr_p, 0))
2781 continue;
2782 obj = *expr_p;
2783 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2784 x = produce_memory_decl_rtl (obj, regno);
2785 break;
2787 case SSA_NAME:
2788 *ws = 0;
2789 obj = SSA_NAME_VAR (*expr_p);
2790 if (!DECL_RTL_SET_P (obj))
2791 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2792 break;
2794 case VAR_DECL:
2795 case PARM_DECL:
2796 case RESULT_DECL:
2797 *ws = 0;
2798 obj = *expr_p;
2800 if (DECL_RTL_SET_P (obj))
2801 break;
2803 if (DECL_MODE (obj) == BLKmode)
2804 x = produce_memory_decl_rtl (obj, regno);
2805 else
2806 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2808 break;
2810 default:
2811 break;
2814 if (x)
2816 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2817 SET_DECL_RTL (obj, x);
2820 return NULL_TREE;
2823 /* Determines cost of the computation of EXPR. */
2825 static unsigned
2826 computation_cost (tree expr, bool speed)
2828 rtx seq, rslt;
2829 tree type = TREE_TYPE (expr);
2830 unsigned cost;
2831 /* Avoid using hard regs in ways which may be unsupported. */
2832 int regno = LAST_VIRTUAL_REGISTER + 1;
2833 struct cgraph_node *node = cgraph_node (current_function_decl);
2834 enum node_frequency real_frequency = node->frequency;
2836 node->frequency = NODE_FREQUENCY_NORMAL;
2837 crtl->maybe_hot_insn_p = speed;
2838 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2839 start_sequence ();
2840 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2841 seq = get_insns ();
2842 end_sequence ();
2843 default_rtl_profile ();
2844 node->frequency = real_frequency;
2846 cost = seq_cost (seq, speed);
2847 if (MEM_P (rslt))
2848 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2849 TYPE_ADDR_SPACE (type), speed);
2851 return cost;
2854 /* Returns variable containing the value of candidate CAND at statement AT. */
2856 static tree
2857 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2859 if (stmt_after_increment (loop, cand, stmt))
2860 return cand->var_after;
2861 else
2862 return cand->var_before;
2865 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2866 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2869 tree_int_cst_sign_bit (const_tree t)
2871 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2872 unsigned HOST_WIDE_INT w;
2874 if (bitno < HOST_BITS_PER_WIDE_INT)
2875 w = TREE_INT_CST_LOW (t);
2876 else
2878 w = TREE_INT_CST_HIGH (t);
2879 bitno -= HOST_BITS_PER_WIDE_INT;
2882 return (w >> bitno) & 1;
2885 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2886 same precision that is at least as wide as the precision of TYPE, stores
2887 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2888 type of A and B. */
2890 static tree
2891 determine_common_wider_type (tree *a, tree *b)
2893 tree wider_type = NULL;
2894 tree suba, subb;
2895 tree atype = TREE_TYPE (*a);
2897 if (CONVERT_EXPR_P (*a))
2899 suba = TREE_OPERAND (*a, 0);
2900 wider_type = TREE_TYPE (suba);
2901 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2902 return atype;
2904 else
2905 return atype;
2907 if (CONVERT_EXPR_P (*b))
2909 subb = TREE_OPERAND (*b, 0);
2910 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2911 return atype;
2913 else
2914 return atype;
2916 *a = suba;
2917 *b = subb;
2918 return wider_type;
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND at statement AT in LOOP. The expression is stored in a decomposed
2923 form into AFF. Returns false if USE cannot be expressed using CAND. */
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927 struct iv_use *use, struct iv_cand *cand, gimple at,
2928 struct affine_tree_combination *aff)
2930 tree ubase = use->iv->base;
2931 tree ustep = use->iv->step;
2932 tree cbase = cand->iv->base;
2933 tree cstep = cand->iv->step, cstep_common;
2934 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935 tree common_type, var;
2936 tree uutype;
2937 aff_tree cbase_aff, var_aff;
2938 double_int rat;
2940 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2942 /* We do not have a precision to express the values of use. */
2943 return false;
2946 var = var_at_stmt (loop, cand, at);
2947 uutype = unsigned_type_for (utype);
2949 /* If the conversion is not noop, perform it. */
2950 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2952 cstep = fold_convert (uutype, cstep);
2953 cbase = fold_convert (uutype, cbase);
2954 var = fold_convert (uutype, var);
2957 if (!constant_multiple_of (ustep, cstep, &rat))
2958 return false;
2960 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2961 type, we achieve better folding by computing their difference in this
2962 wider type, and cast the result to UUTYPE. We do not need to worry about
2963 overflows, as all the arithmetics will in the end be performed in UUTYPE
2964 anyway. */
2965 common_type = determine_common_wider_type (&ubase, &cbase);
2967 /* use = ubase - ratio * cbase + ratio * var. */
2968 tree_to_aff_combination (ubase, common_type, aff);
2969 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2970 tree_to_aff_combination (var, uutype, &var_aff);
2972 /* We need to shift the value if we are after the increment. */
2973 if (stmt_after_increment (loop, cand, at))
2975 aff_tree cstep_aff;
2977 if (common_type != uutype)
2978 cstep_common = fold_convert (common_type, cstep);
2979 else
2980 cstep_common = cstep;
2982 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2983 aff_combination_add (&cbase_aff, &cstep_aff);
2986 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2987 aff_combination_add (aff, &cbase_aff);
2988 if (common_type != uutype)
2989 aff_combination_convert (aff, uutype);
2991 aff_combination_scale (&var_aff, rat);
2992 aff_combination_add (aff, &var_aff);
2994 return true;
2997 /* Determines the expression by that USE is expressed from induction variable
2998 CAND at statement AT in LOOP. The computation is unshared. */
3000 static tree
3001 get_computation_at (struct loop *loop,
3002 struct iv_use *use, struct iv_cand *cand, gimple at)
3004 aff_tree aff;
3005 tree type = TREE_TYPE (use->iv->base);
3007 if (!get_computation_aff (loop, use, cand, at, &aff))
3008 return NULL_TREE;
3009 unshare_aff_combination (&aff);
3010 return fold_convert (type, aff_combination_to_tree (&aff));
3013 /* Determines the expression by that USE is expressed from induction variable
3014 CAND in LOOP. The computation is unshared. */
3016 static tree
3017 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3019 return get_computation_at (loop, use, cand, use->stmt);
3022 /* Adjust the cost COST for being in loop setup rather than loop body.
3023 If we're optimizing for space, the loop setup overhead is constant;
3024 if we're optimizing for speed, amortize it over the per-iteration cost. */
3025 static unsigned
3026 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3028 if (cost == INFTY)
3029 return cost;
3030 else if (optimize_loop_for_speed_p (data->current_loop))
3031 return cost / avg_loop_niter (data->current_loop);
3032 else
3033 return cost;
3036 /* Returns cost of addition in MODE. */
3038 static unsigned
3039 add_cost (enum machine_mode mode, bool speed)
3041 static unsigned costs[NUM_MACHINE_MODES];
3042 rtx seq;
3043 unsigned cost;
3045 if (costs[mode])
3046 return costs[mode];
3048 start_sequence ();
3049 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3050 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3052 NULL_RTX);
3053 seq = get_insns ();
3054 end_sequence ();
3056 cost = seq_cost (seq, speed);
3057 if (!cost)
3058 cost = 1;
3060 costs[mode] = cost;
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3063 fprintf (dump_file, "Addition in %s costs %d\n",
3064 GET_MODE_NAME (mode), cost);
3065 return cost;
3068 /* Entry in a hashtable of already known costs for multiplication. */
3069 struct mbc_entry
3071 HOST_WIDE_INT cst; /* The constant to multiply by. */
3072 enum machine_mode mode; /* In mode. */
3073 unsigned cost; /* The cost. */
3076 /* Counts hash value for the ENTRY. */
3078 static hashval_t
3079 mbc_entry_hash (const void *entry)
3081 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3083 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3086 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3088 static int
3089 mbc_entry_eq (const void *entry1, const void *entry2)
3091 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3092 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3094 return (e1->mode == e2->mode
3095 && e1->cst == e2->cst);
3098 /* Returns cost of multiplication by constant CST in MODE. */
3100 unsigned
3101 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3103 static htab_t costs;
3104 struct mbc_entry **cached, act;
3105 rtx seq;
3106 unsigned cost;
3108 if (!costs)
3109 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3111 act.mode = mode;
3112 act.cst = cst;
3113 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3114 if (*cached)
3115 return (*cached)->cost;
3117 *cached = XNEW (struct mbc_entry);
3118 (*cached)->mode = mode;
3119 (*cached)->cst = cst;
3121 start_sequence ();
3122 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3123 gen_int_mode (cst, mode), NULL_RTX, 0);
3124 seq = get_insns ();
3125 end_sequence ();
3127 cost = seq_cost (seq, speed);
3129 if (dump_file && (dump_flags & TDF_DETAILS))
3130 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3131 (int) cst, GET_MODE_NAME (mode), cost);
3133 (*cached)->cost = cost;
3135 return cost;
3138 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3139 validity for a memory reference accessing memory of mode MODE in
3140 address space AS. */
3142 DEF_VEC_P (sbitmap);
3143 DEF_VEC_ALLOC_P (sbitmap, heap);
3145 bool
3146 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3147 addr_space_t as)
3149 #define MAX_RATIO 128
3150 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3151 static VEC (sbitmap, heap) *valid_mult_list;
3152 sbitmap valid_mult;
3154 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3155 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3157 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3158 if (!valid_mult)
3160 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3161 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3162 rtx addr;
3163 HOST_WIDE_INT i;
3165 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166 sbitmap_zero (valid_mult);
3167 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3170 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3171 if (memory_address_addr_space_p (mode, addr, as))
3172 SET_BIT (valid_mult, i + MAX_RATIO);
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3177 fprintf (dump_file, " allowed multipliers:");
3178 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3179 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3180 fprintf (dump_file, " %d", (int) i);
3181 fprintf (dump_file, "\n");
3182 fprintf (dump_file, "\n");
3185 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3188 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3189 return false;
3191 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3194 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3195 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3196 variable is omitted. Compute the cost for a memory reference that accesses
3197 a memory location of mode MEM_MODE in address space AS.
3199 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3200 size of MEM_MODE / RATIO) is available. To make this determination, we
3201 look at the size of the increment to be made, which is given in CSTEP.
3202 CSTEP may be zero if the step is unknown.
3203 STMT_AFTER_INC is true iff the statement we're looking at is after the
3204 increment of the original biv.
3206 TODO -- there must be some better way. This all is quite crude. */
3208 typedef struct
3210 HOST_WIDE_INT min_offset, max_offset;
3211 unsigned costs[2][2][2][2];
3212 } *address_cost_data;
3214 DEF_VEC_P (address_cost_data);
3215 DEF_VEC_ALLOC_P (address_cost_data, heap);
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221 addr_space_t as, bool speed,
3222 bool stmt_after_inc, bool *may_autoinc)
3224 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225 static VEC(address_cost_data, heap) *address_cost_data_list;
3226 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227 address_cost_data data;
3228 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230 unsigned cost, acost, complexity;
3231 bool offset_p, ratio_p, autoinc;
3232 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233 unsigned HOST_WIDE_INT mask;
3234 unsigned bits;
3236 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3237 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3238 data_index + 1);
3240 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3241 if (!data)
3243 HOST_WIDE_INT i;
3244 HOST_WIDE_INT rat, off = 0;
3245 int old_cse_not_expected, width;
3246 unsigned sym_p, var_p, off_p, rat_p, add_c;
3247 rtx seq, addr, base;
3248 rtx reg0, reg1;
3250 data = (address_cost_data) xcalloc (1, sizeof (*data));
3252 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3254 width = GET_MODE_BITSIZE (address_mode) - 1;
3255 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3256 width = HOST_BITS_PER_WIDE_INT - 1;
3257 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3259 for (i = width; i >= 0; i--)
3261 off = -((HOST_WIDE_INT) 1 << i);
3262 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3263 if (memory_address_addr_space_p (mem_mode, addr, as))
3264 break;
3266 data->min_offset = (i == -1? 0 : off);
3268 for (i = width; i >= 0; i--)
3270 off = ((HOST_WIDE_INT) 1 << i) - 1;
3271 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3272 if (memory_address_addr_space_p (mem_mode, addr, as))
3273 break;
3275 if (i == -1)
3276 off = 0;
3277 data->max_offset = off;
3279 if (dump_file && (dump_flags & TDF_DETAILS))
3281 fprintf (dump_file, "get_address_cost:\n");
3282 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3283 GET_MODE_NAME (mem_mode),
3284 data->min_offset);
3285 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3286 GET_MODE_NAME (mem_mode),
3287 data->max_offset);
3290 rat = 1;
3291 for (i = 2; i <= MAX_RATIO; i++)
3292 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3294 rat = i;
3295 break;
3298 /* Compute the cost of various addressing modes. */
3299 acost = 0;
3300 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3301 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3303 if (HAVE_PRE_DECREMENT)
3305 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3306 has_predec[mem_mode]
3307 = memory_address_addr_space_p (mem_mode, addr, as);
3309 if (HAVE_POST_DECREMENT)
3311 addr = gen_rtx_POST_DEC (address_mode, reg0);
3312 has_postdec[mem_mode]
3313 = memory_address_addr_space_p (mem_mode, addr, as);
3315 if (HAVE_PRE_INCREMENT)
3317 addr = gen_rtx_PRE_INC (address_mode, reg0);
3318 has_preinc[mem_mode]
3319 = memory_address_addr_space_p (mem_mode, addr, as);
3321 if (HAVE_POST_INCREMENT)
3323 addr = gen_rtx_POST_INC (address_mode, reg0);
3324 has_postinc[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3327 for (i = 0; i < 16; i++)
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3334 addr = reg0;
3335 if (rat_p)
3336 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337 gen_int_mode (rat, address_mode));
3339 if (var_p)
3340 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3342 if (sym_p)
3344 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3351 if (off_p)
3352 base = gen_rtx_fmt_e (CONST, address_mode,
3353 gen_rtx_fmt_ee
3354 (PLUS, address_mode, base,
3355 gen_int_mode (off, address_mode)));
3357 else if (off_p)
3358 base = gen_int_mode (off, address_mode);
3359 else
3360 base = NULL_RTX;
3362 if (base)
3363 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3365 start_sequence ();
3366 /* To avoid splitting addressing modes, pretend that no cse will
3367 follow. */
3368 old_cse_not_expected = cse_not_expected;
3369 cse_not_expected = true;
3370 addr = memory_address_addr_space (mem_mode, addr, as);
3371 cse_not_expected = old_cse_not_expected;
3372 seq = get_insns ();
3373 end_sequence ();
3375 acost = seq_cost (seq, speed);
3376 acost += address_cost (addr, mem_mode, as, speed);
3378 if (!acost)
3379 acost = 1;
3380 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c = add_cost (address_mode, speed);
3396 for (i = 0; i < 8; i++)
3398 var_p = i & 1;
3399 off_p = (i >> 1) & 1;
3400 rat_p = (i >> 2) & 1;
3402 acost = data->costs[0][1][off_p][rat_p] + 1;
3403 if (var_p)
3404 acost += add_c;
3406 if (acost < data->costs[1][var_p][off_p][rat_p])
3407 data->costs[1][var_p][off_p][rat_p] = acost;
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3412 fprintf (dump_file, "Address costs:\n");
3414 for (i = 0; i < 16; i++)
3416 sym_p = i & 1;
3417 var_p = (i >> 1) & 1;
3418 off_p = (i >> 2) & 1;
3419 rat_p = (i >> 3) & 1;
3421 fprintf (dump_file, " ");
3422 if (sym_p)
3423 fprintf (dump_file, "sym + ");
3424 if (var_p)
3425 fprintf (dump_file, "var + ");
3426 if (off_p)
3427 fprintf (dump_file, "cst + ");
3428 if (rat_p)
3429 fprintf (dump_file, "rat * ");
3431 acost = data->costs[sym_p][var_p][off_p][rat_p];
3432 fprintf (dump_file, "index costs %d\n", acost);
3434 if (has_predec[mem_mode] || has_postdec[mem_mode]
3435 || has_preinc[mem_mode] || has_postinc[mem_mode])
3436 fprintf (dump_file, " May include autoinc/dec\n");
3437 fprintf (dump_file, "\n");
3440 VEC_replace (address_cost_data, address_cost_data_list,
3441 data_index, data);
3444 bits = GET_MODE_BITSIZE (address_mode);
3445 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3446 offset &= mask;
3447 if ((offset >> (bits - 1) & 1))
3448 offset |= ~mask;
3449 s_offset = offset;
3451 autoinc = false;
3452 msize = GET_MODE_SIZE (mem_mode);
3453 autoinc_offset = offset;
3454 if (stmt_after_inc)
3455 autoinc_offset += ratio * cstep;
3456 if (symbol_present || var_present || ratio != 1)
3457 autoinc = false;
3458 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3459 && msize == cstep)
3460 || (has_postdec[mem_mode] && autoinc_offset == 0
3461 && msize == -cstep)
3462 || (has_preinc[mem_mode] && autoinc_offset == msize
3463 && msize == cstep)
3464 || (has_predec[mem_mode] && autoinc_offset == -msize
3465 && msize == -cstep))
3466 autoinc = true;
3468 cost = 0;
3469 offset_p = (s_offset != 0
3470 && data->min_offset <= s_offset
3471 && s_offset <= data->max_offset);
3472 ratio_p = (ratio != 1
3473 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3475 if (ratio != 1 && !ratio_p)
3476 cost += multiply_by_cost (ratio, address_mode, speed);
3478 if (s_offset && !offset_p && !symbol_present)
3479 cost += add_cost (address_mode, speed);
3481 if (may_autoinc)
3482 *may_autoinc = autoinc;
3483 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3484 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3485 return new_cost (cost + acost, complexity);
3488 /* Estimates cost of forcing expression EXPR into a variable. */
3490 static comp_cost
3491 force_expr_to_var_cost (tree expr, bool speed)
3493 static bool costs_initialized = false;
3494 static unsigned integer_cost [2];
3495 static unsigned symbol_cost [2];
3496 static unsigned address_cost [2];
3497 tree op0, op1;
3498 comp_cost cost0, cost1, cost;
3499 enum machine_mode mode;
3501 if (!costs_initialized)
3503 tree type = build_pointer_type (integer_type_node);
3504 tree var, addr;
3505 rtx x;
3506 int i;
3508 var = create_tmp_var_raw (integer_type_node, "test_var");
3509 TREE_STATIC (var) = 1;
3510 x = produce_memory_decl_rtl (var, NULL);
3511 SET_DECL_RTL (var, x);
3513 addr = build1 (ADDR_EXPR, type, var);
3516 for (i = 0; i < 2; i++)
3518 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3519 2000), i);
3521 symbol_cost[i] = computation_cost (addr, i) + 1;
3523 address_cost[i]
3524 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3525 addr,
3526 build_int_cst (sizetype, 2000)), i) + 1;
3527 if (dump_file && (dump_flags & TDF_DETAILS))
3529 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3530 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3531 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3532 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3533 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3534 fprintf (dump_file, "\n");
3538 costs_initialized = true;
3541 STRIP_NOPS (expr);
3543 if (SSA_VAR_P (expr))
3544 return zero_cost;
3546 if (is_gimple_min_invariant (expr))
3548 if (TREE_CODE (expr) == INTEGER_CST)
3549 return new_cost (integer_cost [speed], 0);
3551 if (TREE_CODE (expr) == ADDR_EXPR)
3553 tree obj = TREE_OPERAND (expr, 0);
3555 if (TREE_CODE (obj) == VAR_DECL
3556 || TREE_CODE (obj) == PARM_DECL
3557 || TREE_CODE (obj) == RESULT_DECL)
3558 return new_cost (symbol_cost [speed], 0);
3561 return new_cost (address_cost [speed], 0);
3564 switch (TREE_CODE (expr))
3566 case POINTER_PLUS_EXPR:
3567 case PLUS_EXPR:
3568 case MINUS_EXPR:
3569 case MULT_EXPR:
3570 op0 = TREE_OPERAND (expr, 0);
3571 op1 = TREE_OPERAND (expr, 1);
3572 STRIP_NOPS (op0);
3573 STRIP_NOPS (op1);
3575 if (is_gimple_val (op0))
3576 cost0 = zero_cost;
3577 else
3578 cost0 = force_expr_to_var_cost (op0, speed);
3580 if (is_gimple_val (op1))
3581 cost1 = zero_cost;
3582 else
3583 cost1 = force_expr_to_var_cost (op1, speed);
3585 break;
3587 case NEGATE_EXPR:
3588 op0 = TREE_OPERAND (expr, 0);
3589 STRIP_NOPS (op0);
3590 op1 = NULL_TREE;
3592 if (is_gimple_val (op0))
3593 cost0 = zero_cost;
3594 else
3595 cost0 = force_expr_to_var_cost (op0, speed);
3597 cost1 = zero_cost;
3598 break;
3600 default:
3601 /* Just an arbitrary value, FIXME. */
3602 return new_cost (target_spill_cost[speed], 0);
3605 mode = TYPE_MODE (TREE_TYPE (expr));
3606 switch (TREE_CODE (expr))
3608 case POINTER_PLUS_EXPR:
3609 case PLUS_EXPR:
3610 case MINUS_EXPR:
3611 case NEGATE_EXPR:
3612 cost = new_cost (add_cost (mode, speed), 0);
3613 break;
3615 case MULT_EXPR:
3616 if (cst_and_fits_in_hwi (op0))
3617 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3618 else if (cst_and_fits_in_hwi (op1))
3619 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3620 else
3621 return new_cost (target_spill_cost [speed], 0);
3622 break;
3624 default:
3625 gcc_unreachable ();
3628 cost = add_costs (cost, cost0);
3629 cost = add_costs (cost, cost1);
3631 /* Bound the cost by target_spill_cost. The parts of complicated
3632 computations often are either loop invariant or at least can
3633 be shared between several iv uses, so letting this grow without
3634 limits would not give reasonable results. */
3635 if (cost.cost > (int) target_spill_cost [speed])
3636 cost.cost = target_spill_cost [speed];
3638 return cost;
3641 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3642 invariants the computation depends on. */
3644 static comp_cost
3645 force_var_cost (struct ivopts_data *data,
3646 tree expr, bitmap *depends_on)
3648 if (depends_on)
3650 fd_ivopts_data = data;
3651 walk_tree (&expr, find_depends, depends_on, NULL);
3654 return force_expr_to_var_cost (expr, data->speed);
3657 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3658 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3659 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3660 invariants the computation depends on. */
3662 static comp_cost
3663 split_address_cost (struct ivopts_data *data,
3664 tree addr, bool *symbol_present, bool *var_present,
3665 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3667 tree core;
3668 HOST_WIDE_INT bitsize;
3669 HOST_WIDE_INT bitpos;
3670 tree toffset;
3671 enum machine_mode mode;
3672 int unsignedp, volatilep;
3674 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3675 &unsignedp, &volatilep, false);
3677 if (toffset != 0
3678 || bitpos % BITS_PER_UNIT != 0
3679 || TREE_CODE (core) != VAR_DECL)
3681 *symbol_present = false;
3682 *var_present = true;
3683 fd_ivopts_data = data;
3684 walk_tree (&addr, find_depends, depends_on, NULL);
3685 return new_cost (target_spill_cost[data->speed], 0);
3688 *offset += bitpos / BITS_PER_UNIT;
3689 if (TREE_STATIC (core)
3690 || DECL_EXTERNAL (core))
3692 *symbol_present = true;
3693 *var_present = false;
3694 return zero_cost;
3697 *symbol_present = false;
3698 *var_present = true;
3699 return zero_cost;
3702 /* Estimates cost of expressing difference of addresses E1 - E2 as
3703 var + symbol + offset. The value of offset is added to OFFSET,
3704 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3705 part is missing. DEPENDS_ON is a set of the invariants the computation
3706 depends on. */
3708 static comp_cost
3709 ptr_difference_cost (struct ivopts_data *data,
3710 tree e1, tree e2, bool *symbol_present, bool *var_present,
3711 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3713 HOST_WIDE_INT diff = 0;
3714 aff_tree aff_e1, aff_e2;
3715 tree type;
3717 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3719 if (ptr_difference_const (e1, e2, &diff))
3721 *offset += diff;
3722 *symbol_present = false;
3723 *var_present = false;
3724 return zero_cost;
3727 if (integer_zerop (e2))
3728 return split_address_cost (data, TREE_OPERAND (e1, 0),
3729 symbol_present, var_present, offset, depends_on);
3731 *symbol_present = false;
3732 *var_present = true;
3734 type = signed_type_for (TREE_TYPE (e1));
3735 tree_to_aff_combination (e1, type, &aff_e1);
3736 tree_to_aff_combination (e2, type, &aff_e2);
3737 aff_combination_scale (&aff_e2, double_int_minus_one);
3738 aff_combination_add (&aff_e1, &aff_e2);
3740 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3743 /* Estimates cost of expressing difference E1 - E2 as
3744 var + symbol + offset. The value of offset is added to OFFSET,
3745 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3746 part is missing. DEPENDS_ON is a set of the invariants the computation
3747 depends on. */
3749 static comp_cost
3750 difference_cost (struct ivopts_data *data,
3751 tree e1, tree e2, bool *symbol_present, bool *var_present,
3752 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3754 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3755 unsigned HOST_WIDE_INT off1, off2;
3756 aff_tree aff_e1, aff_e2;
3757 tree type;
3759 e1 = strip_offset (e1, &off1);
3760 e2 = strip_offset (e2, &off2);
3761 *offset += off1 - off2;
3763 STRIP_NOPS (e1);
3764 STRIP_NOPS (e2);
3766 if (TREE_CODE (e1) == ADDR_EXPR)
3767 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3768 offset, depends_on);
3769 *symbol_present = false;
3771 if (operand_equal_p (e1, e2, 0))
3773 *var_present = false;
3774 return zero_cost;
3777 *var_present = true;
3779 if (integer_zerop (e2))
3780 return force_var_cost (data, e1, depends_on);
3782 if (integer_zerop (e1))
3784 comp_cost cost = force_var_cost (data, e2, depends_on);
3785 cost.cost += multiply_by_cost (-1, mode, data->speed);
3786 return cost;
3789 type = signed_type_for (TREE_TYPE (e1));
3790 tree_to_aff_combination (e1, type, &aff_e1);
3791 tree_to_aff_combination (e2, type, &aff_e2);
3792 aff_combination_scale (&aff_e2, double_int_minus_one);
3793 aff_combination_add (&aff_e1, &aff_e2);
3795 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3798 /* Returns true if AFF1 and AFF2 are identical. */
3800 static bool
3801 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3803 unsigned i;
3805 if (aff1->n != aff2->n)
3806 return false;
3808 for (i = 0; i < aff1->n; i++)
3810 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3811 return false;
3813 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3814 return false;
3816 return true;
3819 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3820 requires a new compiler generated temporary. Returns -1 otherwise.
3821 ADDRESS_P is a flag indicating if the expression is for address
3822 computation. */
3824 static int
3825 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3826 tree cbase, HOST_WIDE_INT ratio,
3827 bool address_p)
3829 aff_tree ubase_aff, cbase_aff;
3830 tree expr, ub, cb;
3831 struct iv_inv_expr_ent ent;
3832 struct iv_inv_expr_ent **slot;
3834 STRIP_NOPS (ubase);
3835 STRIP_NOPS (cbase);
3836 ub = ubase;
3837 cb = cbase;
3839 if ((TREE_CODE (ubase) == INTEGER_CST)
3840 && (TREE_CODE (cbase) == INTEGER_CST))
3841 return -1;
3843 /* Strips the constant part. */
3844 if (TREE_CODE (ubase) == PLUS_EXPR
3845 || TREE_CODE (ubase) == MINUS_EXPR
3846 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3848 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849 ubase = TREE_OPERAND (ubase, 0);
3852 /* Strips the constant part. */
3853 if (TREE_CODE (cbase) == PLUS_EXPR
3854 || TREE_CODE (cbase) == MINUS_EXPR
3855 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3857 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858 cbase = TREE_OPERAND (cbase, 0);
3861 if (address_p)
3863 if (((TREE_CODE (ubase) == SSA_NAME)
3864 || (TREE_CODE (ubase) == ADDR_EXPR
3865 && is_gimple_min_invariant (ubase)))
3866 && (TREE_CODE (cbase) == INTEGER_CST))
3867 return -1;
3869 if (((TREE_CODE (cbase) == SSA_NAME)
3870 || (TREE_CODE (cbase) == ADDR_EXPR
3871 && is_gimple_min_invariant (cbase)))
3872 && (TREE_CODE (ubase) == INTEGER_CST))
3873 return -1;
3876 if (ratio == 1)
3878 if(operand_equal_p (ubase, cbase, 0))
3879 return -1;
3881 if (TREE_CODE (ubase) == ADDR_EXPR
3882 && TREE_CODE (cbase) == ADDR_EXPR)
3884 tree usym, csym;
3886 usym = TREE_OPERAND (ubase, 0);
3887 csym = TREE_OPERAND (cbase, 0);
3888 if (TREE_CODE (usym) == ARRAY_REF)
3890 tree ind = TREE_OPERAND (usym, 1);
3891 if (TREE_CODE (ind) == INTEGER_CST
3892 && host_integerp (ind, 0)
3893 && TREE_INT_CST_LOW (ind) == 0)
3894 usym = TREE_OPERAND (usym, 0);
3896 if (TREE_CODE (csym) == ARRAY_REF)
3898 tree ind = TREE_OPERAND (csym, 1);
3899 if (TREE_CODE (ind) == INTEGER_CST
3900 && host_integerp (ind, 0)
3901 && TREE_INT_CST_LOW (ind) == 0)
3902 csym = TREE_OPERAND (csym, 0);
3904 if (operand_equal_p (usym, csym, 0))
3905 return -1;
3907 /* Now do more complex comparison */
3908 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911 return -1;
3914 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3917 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3918 aff_combination_add (&ubase_aff, &cbase_aff);
3919 expr = aff_combination_to_tree (&ubase_aff);
3920 ent.expr = expr;
3921 ent.hash = iterative_hash_expr (expr, 0);
3922 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3923 &ent, INSERT);
3924 if (*slot)
3925 return (*slot)->id;
3927 *slot = XNEW (struct iv_inv_expr_ent);
3928 (*slot)->expr = expr;
3929 (*slot)->hash = ent.hash;
3930 (*slot)->id = data->inv_expr_id++;
3931 return (*slot)->id;
3936 /* Determines the cost of the computation by that USE is expressed
3937 from induction variable CAND. If ADDRESS_P is true, we just need
3938 to create an address from it, otherwise we want to get it into
3939 register. A set of invariants we depend on is stored in
3940 DEPENDS_ON. AT is the statement at that the value is computed.
3941 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3942 addressing is likely. */
3944 static comp_cost
3945 get_computation_cost_at (struct ivopts_data *data,
3946 struct iv_use *use, struct iv_cand *cand,
3947 bool address_p, bitmap *depends_on, gimple at,
3948 bool *can_autoinc,
3949 int *inv_expr_id)
3951 tree ubase = use->iv->base, ustep = use->iv->step;
3952 tree cbase, cstep;
3953 tree utype = TREE_TYPE (ubase), ctype;
3954 unsigned HOST_WIDE_INT cstepi, offset = 0;
3955 HOST_WIDE_INT ratio, aratio;
3956 bool var_present, symbol_present, stmt_is_after_inc;
3957 comp_cost cost;
3958 double_int rat;
3959 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3961 *depends_on = NULL;
3963 /* Only consider real candidates. */
3964 if (!cand->iv)
3965 return infinite_cost;
3967 cbase = cand->iv->base;
3968 cstep = cand->iv->step;
3969 ctype = TREE_TYPE (cbase);
3971 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3973 /* We do not have a precision to express the values of use. */
3974 return infinite_cost;
3977 if (address_p)
3979 /* Do not try to express address of an object with computation based
3980 on address of a different object. This may cause problems in rtl
3981 level alias analysis (that does not expect this to be happening,
3982 as this is illegal in C), and would be unlikely to be useful
3983 anyway. */
3984 if (use->iv->base_object
3985 && cand->iv->base_object
3986 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3987 return infinite_cost;
3990 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3992 /* TODO -- add direct handling of this case. */
3993 goto fallback;
3996 /* CSTEPI is removed from the offset in case statement is after the
3997 increment. If the step is not constant, we use zero instead.
3998 This is a bit imprecise (there is the extra addition), but
3999 redundancy elimination is likely to transform the code so that
4000 it uses value of the variable before increment anyway,
4001 so it is not that much unrealistic. */
4002 if (cst_and_fits_in_hwi (cstep))
4003 cstepi = int_cst_value (cstep);
4004 else
4005 cstepi = 0;
4007 if (!constant_multiple_of (ustep, cstep, &rat))
4008 return infinite_cost;
4010 if (double_int_fits_in_shwi_p (rat))
4011 ratio = double_int_to_shwi (rat);
4012 else
4013 return infinite_cost;
4015 STRIP_NOPS (cbase);
4016 ctype = TREE_TYPE (cbase);
4018 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4019 or ratio == 1, it is better to handle this like
4021 ubase - ratio * cbase + ratio * var
4023 (also holds in the case ratio == -1, TODO. */
4025 if (cst_and_fits_in_hwi (cbase))
4027 offset = - ratio * int_cst_value (cbase);
4028 cost = difference_cost (data,
4029 ubase, build_int_cst (utype, 0),
4030 &symbol_present, &var_present, &offset,
4031 depends_on);
4032 cost.cost /= avg_loop_niter (data->current_loop);
4034 else if (ratio == 1)
4036 cost = difference_cost (data,
4037 ubase, cbase,
4038 &symbol_present, &var_present, &offset,
4039 depends_on);
4040 cost.cost /= avg_loop_niter (data->current_loop);
4042 else if (address_p
4043 && !POINTER_TYPE_P (ctype)
4044 && multiplier_allowed_in_address_p
4045 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4046 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4048 cbase
4049 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4050 cost = difference_cost (data,
4051 ubase, cbase,
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4056 else
4058 cost = force_var_cost (data, cbase, depends_on);
4059 cost = add_costs (cost,
4060 difference_cost (data,
4061 ubase, build_int_cst (utype, 0),
4062 &symbol_present, &var_present,
4063 &offset, depends_on));
4064 cost.cost /= avg_loop_niter (data->current_loop);
4065 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4068 if (inv_expr_id)
4070 *inv_expr_id =
4071 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4072 /* Clear depends on. */
4073 if (*inv_expr_id != -1 && depends_on && *depends_on)
4074 bitmap_clear (*depends_on);
4077 /* If we are after the increment, the value of the candidate is higher by
4078 one iteration. */
4079 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4080 if (stmt_is_after_inc)
4081 offset -= ratio * cstepi;
4083 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4084 (symbol/var1/const parts may be omitted). If we are looking for an
4085 address, find the cost of addressing this. */
4086 if (address_p)
4087 return add_costs (cost,
4088 get_address_cost (symbol_present, var_present,
4089 offset, ratio, cstepi,
4090 TYPE_MODE (TREE_TYPE (utype)),
4091 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4092 speed, stmt_is_after_inc,
4093 can_autoinc));
4095 /* Otherwise estimate the costs for computing the expression. */
4096 if (!symbol_present && !var_present && !offset)
4098 if (ratio != 1)
4099 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4100 return cost;
4103 /* Symbol + offset should be compile-time computable so consider that they
4104 are added once to the variable, if present. */
4105 if (var_present && (symbol_present || offset))
4106 cost.cost += adjust_setup_cost (data,
4107 add_cost (TYPE_MODE (ctype), speed));
4109 /* Having offset does not affect runtime cost in case it is added to
4110 symbol, but it increases complexity. */
4111 if (offset)
4112 cost.complexity++;
4114 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4116 aratio = ratio > 0 ? ratio : -ratio;
4117 if (aratio != 1)
4118 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4119 return cost;
4121 fallback:
4122 if (can_autoinc)
4123 *can_autoinc = false;
4126 /* Just get the expression, expand it and measure the cost. */
4127 tree comp = get_computation_at (data->current_loop, use, cand, at);
4129 if (!comp)
4130 return infinite_cost;
4132 if (address_p)
4133 comp = build_simple_mem_ref (comp);
4135 return new_cost (computation_cost (comp, speed), 0);
4139 /* Determines the cost of the computation by that USE is expressed
4140 from induction variable CAND. If ADDRESS_P is true, we just need
4141 to create an address from it, otherwise we want to get it into
4142 register. A set of invariants we depend on is stored in
4143 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4144 autoinc addressing is likely. */
4146 static comp_cost
4147 get_computation_cost (struct ivopts_data *data,
4148 struct iv_use *use, struct iv_cand *cand,
4149 bool address_p, bitmap *depends_on,
4150 bool *can_autoinc, int *inv_expr_id)
4152 return get_computation_cost_at (data,
4153 use, cand, address_p, depends_on, use->stmt,
4154 can_autoinc, inv_expr_id);
4157 /* Determines cost of basing replacement of USE on CAND in a generic
4158 expression. */
4160 static bool
4161 determine_use_iv_cost_generic (struct ivopts_data *data,
4162 struct iv_use *use, struct iv_cand *cand)
4164 bitmap depends_on;
4165 comp_cost cost;
4166 int inv_expr_id = -1;
4168 /* The simple case first -- if we need to express value of the preserved
4169 original biv, the cost is 0. This also prevents us from counting the
4170 cost of increment twice -- once at this use and once in the cost of
4171 the candidate. */
4172 if (cand->pos == IP_ORIGINAL
4173 && cand->incremented_at == use->stmt)
4175 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4176 return true;
4179 cost = get_computation_cost (data, use, cand, false, &depends_on,
4180 NULL, &inv_expr_id);
4182 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4183 inv_expr_id);
4185 return !infinite_cost_p (cost);
4188 /* Determines cost of basing replacement of USE on CAND in an address. */
4190 static bool
4191 determine_use_iv_cost_address (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4194 bitmap depends_on;
4195 bool can_autoinc;
4196 int inv_expr_id = -1;
4197 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4198 &can_autoinc, &inv_expr_id);
4200 if (cand->ainc_use == use)
4202 if (can_autoinc)
4203 cost.cost -= cand->cost_step;
4204 /* If we generated the candidate solely for exploiting autoincrement
4205 opportunities, and it turns out it can't be used, set the cost to
4206 infinity to make sure we ignore it. */
4207 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4208 cost = infinite_cost;
4210 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4211 inv_expr_id);
4213 return !infinite_cost_p (cost);
4216 /* Computes value of candidate CAND at position AT in iteration NITER, and
4217 stores it to VAL. */
4219 static void
4220 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4221 aff_tree *val)
4223 aff_tree step, delta, nit;
4224 struct iv *iv = cand->iv;
4225 tree type = TREE_TYPE (iv->base);
4226 tree steptype = type;
4227 if (POINTER_TYPE_P (type))
4228 steptype = sizetype;
4230 tree_to_aff_combination (iv->step, steptype, &step);
4231 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4232 aff_combination_convert (&nit, steptype);
4233 aff_combination_mult (&nit, &step, &delta);
4234 if (stmt_after_increment (loop, cand, at))
4235 aff_combination_add (&delta, &step);
4237 tree_to_aff_combination (iv->base, type, val);
4238 aff_combination_add (val, &delta);
4241 /* Returns period of induction variable iv. */
4243 static tree
4244 iv_period (struct iv *iv)
4246 tree step = iv->step, period, type;
4247 tree pow2div;
4249 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4251 type = unsigned_type_for (TREE_TYPE (step));
4252 /* Period of the iv is lcm (step, type_range)/step -1,
4253 i.e., N*type_range/step - 1. Since type range is power
4254 of two, N == (step >> num_of_ending_zeros_binary (step),
4255 so the final result is
4257 (type_range >> num_of_ending_zeros_binary (step)) - 1
4260 pow2div = num_ending_zeros (step);
4262 period = build_low_bits_mask (type,
4263 (TYPE_PRECISION (type)
4264 - tree_low_cst (pow2div, 1)));
4266 return period;
4269 /* Returns the comparison operator used when eliminating the iv USE. */
4271 static enum tree_code
4272 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4274 struct loop *loop = data->current_loop;
4275 basic_block ex_bb;
4276 edge exit;
4278 ex_bb = gimple_bb (use->stmt);
4279 exit = EDGE_SUCC (ex_bb, 0);
4280 if (flow_bb_inside_loop_p (loop, exit->dest))
4281 exit = EDGE_SUCC (ex_bb, 1);
4283 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4286 /* Check whether it is possible to express the condition in USE by comparison
4287 of candidate CAND. If so, store the value compared with to BOUND. */
4289 static bool
4290 may_eliminate_iv (struct ivopts_data *data,
4291 struct iv_use *use, struct iv_cand *cand, tree *bound)
4293 basic_block ex_bb;
4294 edge exit;
4295 tree nit, period;
4296 struct loop *loop = data->current_loop;
4297 aff_tree bnd;
4298 struct tree_niter_desc *desc = NULL;
4300 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4301 return false;
4303 /* For now works only for exits that dominate the loop latch.
4304 TODO: extend to other conditions inside loop body. */
4305 ex_bb = gimple_bb (use->stmt);
4306 if (use->stmt != last_stmt (ex_bb)
4307 || gimple_code (use->stmt) != GIMPLE_COND
4308 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4309 return false;
4311 exit = EDGE_SUCC (ex_bb, 0);
4312 if (flow_bb_inside_loop_p (loop, exit->dest))
4313 exit = EDGE_SUCC (ex_bb, 1);
4314 if (flow_bb_inside_loop_p (loop, exit->dest))
4315 return false;
4317 nit = niter_for_exit (data, exit, &desc);
4318 if (!nit)
4319 return false;
4321 /* Determine whether we can use the variable to test the exit condition.
4322 This is the case iff the period of the induction variable is greater
4323 than the number of iterations for which the exit condition is true. */
4324 period = iv_period (cand->iv);
4326 /* If the number of iterations is constant, compare against it directly. */
4327 if (TREE_CODE (nit) == INTEGER_CST)
4329 /* See cand_value_at. */
4330 if (stmt_after_increment (loop, cand, use->stmt))
4332 if (!tree_int_cst_lt (nit, period))
4333 return false;
4335 else
4337 if (tree_int_cst_lt (period, nit))
4338 return false;
4342 /* If not, and if this is the only possible exit of the loop, see whether
4343 we can get a conservative estimate on the number of iterations of the
4344 entire loop and compare against that instead. */
4345 else
4347 double_int period_value, max_niter;
4349 max_niter = desc->max;
4350 if (stmt_after_increment (loop, cand, use->stmt))
4351 max_niter = double_int_add (max_niter, double_int_one);
4352 period_value = tree_to_double_int (period);
4353 if (double_int_ucmp (max_niter, period_value) > 0)
4355 /* See if we can take advantage of infered loop bound information. */
4356 if (loop_only_exit_p (loop, exit))
4358 if (!estimated_loop_iterations (loop, true, &max_niter))
4359 return false;
4360 /* The loop bound is already adjusted by adding 1. */
4361 if (double_int_ucmp (max_niter, period_value) > 0)
4362 return false;
4364 else
4365 return false;
4369 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4371 *bound = aff_combination_to_tree (&bnd);
4372 /* It is unlikely that computing the number of iterations using division
4373 would be more profitable than keeping the original induction variable. */
4374 if (expression_expensive_p (*bound))
4375 return false;
4376 return true;
4380 /* Determines cost of basing replacement of USE on CAND in a condition. */
4382 static bool
4383 determine_use_iv_cost_condition (struct ivopts_data *data,
4384 struct iv_use *use, struct iv_cand *cand)
4386 tree bound = NULL_TREE;
4387 struct iv *cmp_iv;
4388 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4389 comp_cost elim_cost, express_cost, cost;
4390 bool ok;
4391 int inv_expr_id = -1;
4392 tree *control_var, *bound_cst;
4394 /* Only consider real candidates. */
4395 if (!cand->iv)
4397 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4398 return false;
4401 /* Try iv elimination. */
4402 if (may_eliminate_iv (data, use, cand, &bound))
4404 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4405 /* The bound is a loop invariant, so it will be only computed
4406 once. */
4407 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4409 else
4410 elim_cost = infinite_cost;
4412 /* Try expressing the original giv. If it is compared with an invariant,
4413 note that we cannot get rid of it. */
4414 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4415 NULL, &cmp_iv);
4416 gcc_assert (ok);
4418 /* When the condition is a comparison of the candidate IV against
4419 zero, prefer this IV.
4421 TODO: The constant that we're substracting from the cost should
4422 be target-dependent. This information should be added to the
4423 target costs for each backend. */
4424 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4425 && integer_zerop (*bound_cst)
4426 && (operand_equal_p (*control_var, cand->var_after, 0)
4427 || operand_equal_p (*control_var, cand->var_before, 0)))
4428 elim_cost.cost -= 1;
4430 express_cost = get_computation_cost (data, use, cand, false,
4431 &depends_on_express, NULL,
4432 &inv_expr_id);
4433 fd_ivopts_data = data;
4434 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4436 /* Choose the better approach, preferring the eliminated IV. */
4437 if (compare_costs (elim_cost, express_cost) <= 0)
4439 cost = elim_cost;
4440 depends_on = depends_on_elim;
4441 depends_on_elim = NULL;
4443 else
4445 cost = express_cost;
4446 depends_on = depends_on_express;
4447 depends_on_express = NULL;
4448 bound = NULL_TREE;
4451 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4453 if (depends_on_elim)
4454 BITMAP_FREE (depends_on_elim);
4455 if (depends_on_express)
4456 BITMAP_FREE (depends_on_express);
4458 return !infinite_cost_p (cost);
4461 /* Determines cost of basing replacement of USE on CAND. Returns false
4462 if USE cannot be based on CAND. */
4464 static bool
4465 determine_use_iv_cost (struct ivopts_data *data,
4466 struct iv_use *use, struct iv_cand *cand)
4468 switch (use->type)
4470 case USE_NONLINEAR_EXPR:
4471 return determine_use_iv_cost_generic (data, use, cand);
4473 case USE_ADDRESS:
4474 return determine_use_iv_cost_address (data, use, cand);
4476 case USE_COMPARE:
4477 return determine_use_iv_cost_condition (data, use, cand);
4479 default:
4480 gcc_unreachable ();
4484 /* Return true if get_computation_cost indicates that autoincrement is
4485 a possibility for the pair of USE and CAND, false otherwise. */
4487 static bool
4488 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4489 struct iv_cand *cand)
4491 bitmap depends_on;
4492 bool can_autoinc;
4493 comp_cost cost;
4495 if (use->type != USE_ADDRESS)
4496 return false;
4498 cost = get_computation_cost (data, use, cand, true, &depends_on,
4499 &can_autoinc, NULL);
4501 BITMAP_FREE (depends_on);
4503 return !infinite_cost_p (cost) && can_autoinc;
4506 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4507 use that allows autoincrement, and set their AINC_USE if possible. */
4509 static void
4510 set_autoinc_for_original_candidates (struct ivopts_data *data)
4512 unsigned i, j;
4514 for (i = 0; i < n_iv_cands (data); i++)
4516 struct iv_cand *cand = iv_cand (data, i);
4517 struct iv_use *closest = NULL;
4518 if (cand->pos != IP_ORIGINAL)
4519 continue;
4520 for (j = 0; j < n_iv_uses (data); j++)
4522 struct iv_use *use = iv_use (data, j);
4523 unsigned uid = gimple_uid (use->stmt);
4524 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4525 || uid > gimple_uid (cand->incremented_at))
4526 continue;
4527 if (closest == NULL || uid > gimple_uid (closest->stmt))
4528 closest = use;
4530 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4531 continue;
4532 cand->ainc_use = closest;
4536 /* Finds the candidates for the induction variables. */
4538 static void
4539 find_iv_candidates (struct ivopts_data *data)
4541 /* Add commonly used ivs. */
4542 add_standard_iv_candidates (data);
4544 /* Add old induction variables. */
4545 add_old_ivs_candidates (data);
4547 /* Add induction variables derived from uses. */
4548 add_derived_ivs_candidates (data);
4550 set_autoinc_for_original_candidates (data);
4552 /* Record the important candidates. */
4553 record_important_candidates (data);
4556 /* Determines costs of basing the use of the iv on an iv candidate. */
4558 static void
4559 determine_use_iv_costs (struct ivopts_data *data)
4561 unsigned i, j;
4562 struct iv_use *use;
4563 struct iv_cand *cand;
4564 bitmap to_clear = BITMAP_ALLOC (NULL);
4566 alloc_use_cost_map (data);
4568 for (i = 0; i < n_iv_uses (data); i++)
4570 use = iv_use (data, i);
4572 if (data->consider_all_candidates)
4574 for (j = 0; j < n_iv_cands (data); j++)
4576 cand = iv_cand (data, j);
4577 determine_use_iv_cost (data, use, cand);
4580 else
4582 bitmap_iterator bi;
4584 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4586 cand = iv_cand (data, j);
4587 if (!determine_use_iv_cost (data, use, cand))
4588 bitmap_set_bit (to_clear, j);
4591 /* Remove the candidates for that the cost is infinite from
4592 the list of related candidates. */
4593 bitmap_and_compl_into (use->related_cands, to_clear);
4594 bitmap_clear (to_clear);
4598 BITMAP_FREE (to_clear);
4600 if (dump_file && (dump_flags & TDF_DETAILS))
4602 fprintf (dump_file, "Use-candidate costs:\n");
4604 for (i = 0; i < n_iv_uses (data); i++)
4606 use = iv_use (data, i);
4608 fprintf (dump_file, "Use %d:\n", i);
4609 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4610 for (j = 0; j < use->n_map_members; j++)
4612 if (!use->cost_map[j].cand
4613 || infinite_cost_p (use->cost_map[j].cost))
4614 continue;
4616 fprintf (dump_file, " %d\t%d\t%d\t",
4617 use->cost_map[j].cand->id,
4618 use->cost_map[j].cost.cost,
4619 use->cost_map[j].cost.complexity);
4620 if (use->cost_map[j].depends_on)
4621 bitmap_print (dump_file,
4622 use->cost_map[j].depends_on, "","");
4623 if (use->cost_map[j].inv_expr_id != -1)
4624 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4625 fprintf (dump_file, "\n");
4628 fprintf (dump_file, "\n");
4630 fprintf (dump_file, "\n");
4634 /* Determines cost of the candidate CAND. */
4636 static void
4637 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4639 comp_cost cost_base;
4640 unsigned cost, cost_step;
4641 tree base;
4643 if (!cand->iv)
4645 cand->cost = 0;
4646 return;
4649 /* There are two costs associated with the candidate -- its increment
4650 and its initialization. The second is almost negligible for any loop
4651 that rolls enough, so we take it just very little into account. */
4653 base = cand->iv->base;
4654 cost_base = force_var_cost (data, base, NULL);
4655 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4657 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4659 /* Prefer the original ivs unless we may gain something by replacing it.
4660 The reason is to make debugging simpler; so this is not relevant for
4661 artificial ivs created by other optimization passes. */
4662 if (cand->pos != IP_ORIGINAL
4663 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4664 cost++;
4666 /* Prefer not to insert statements into latch unless there are some
4667 already (so that we do not create unnecessary jumps). */
4668 if (cand->pos == IP_END
4669 && empty_block_p (ip_end_pos (data->current_loop)))
4670 cost++;
4672 cand->cost = cost;
4673 cand->cost_step = cost_step;
4676 /* Determines costs of computation of the candidates. */
4678 static void
4679 determine_iv_costs (struct ivopts_data *data)
4681 unsigned i;
4683 if (dump_file && (dump_flags & TDF_DETAILS))
4685 fprintf (dump_file, "Candidate costs:\n");
4686 fprintf (dump_file, " cand\tcost\n");
4689 for (i = 0; i < n_iv_cands (data); i++)
4691 struct iv_cand *cand = iv_cand (data, i);
4693 determine_iv_cost (data, cand);
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4696 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4699 if (dump_file && (dump_flags & TDF_DETAILS))
4700 fprintf (dump_file, "\n");
4703 /* Calculates cost for having SIZE induction variables. */
4705 static unsigned
4706 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4708 /* We add size to the cost, so that we prefer eliminating ivs
4709 if possible. */
4710 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4711 data->body_includes_call);
4714 /* For each size of the induction variable set determine the penalty. */
4716 static void
4717 determine_set_costs (struct ivopts_data *data)
4719 unsigned j, n;
4720 gimple phi;
4721 gimple_stmt_iterator psi;
4722 tree op;
4723 struct loop *loop = data->current_loop;
4724 bitmap_iterator bi;
4726 if (dump_file && (dump_flags & TDF_DETAILS))
4728 fprintf (dump_file, "Global costs:\n");
4729 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4730 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4731 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4732 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4735 n = 0;
4736 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4738 phi = gsi_stmt (psi);
4739 op = PHI_RESULT (phi);
4741 if (!is_gimple_reg (op))
4742 continue;
4744 if (get_iv (data, op))
4745 continue;
4747 n++;
4750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4752 struct version_info *info = ver_info (data, j);
4754 if (info->inv_id && info->has_nonlin_use)
4755 n++;
4758 data->regs_used = n;
4759 if (dump_file && (dump_flags & TDF_DETAILS))
4760 fprintf (dump_file, " regs_used %d\n", n);
4762 if (dump_file && (dump_flags & TDF_DETAILS))
4764 fprintf (dump_file, " cost for size:\n");
4765 fprintf (dump_file, " ivs\tcost\n");
4766 for (j = 0; j <= 2 * target_avail_regs; j++)
4767 fprintf (dump_file, " %d\t%d\n", j,
4768 ivopts_global_cost_for_size (data, j));
4769 fprintf (dump_file, "\n");
4773 /* Returns true if A is a cheaper cost pair than B. */
4775 static bool
4776 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4778 int cmp;
4780 if (!a)
4781 return false;
4783 if (!b)
4784 return true;
4786 cmp = compare_costs (a->cost, b->cost);
4787 if (cmp < 0)
4788 return true;
4790 if (cmp > 0)
4791 return false;
4793 /* In case the costs are the same, prefer the cheaper candidate. */
4794 if (a->cand->cost < b->cand->cost)
4795 return true;
4797 return false;
4801 /* Returns candidate by that USE is expressed in IVS. */
4803 static struct cost_pair *
4804 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4806 return ivs->cand_for_use[use->id];
4810 /* Returns the number of temps needed for new loop invariant
4811 expressions. */
4813 static int
4814 iv_ca_get_num_inv_exprs (struct ivopts_data *data, struct iv_ca *ivs)
4816 unsigned i, n = 0;
4817 unsigned *used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
4819 for (i = 0; i < ivs->upto; i++)
4821 struct iv_use *use = iv_use (data, i);
4822 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
4823 if (cp && cp->inv_expr_id != -1)
4825 used_inv_expr[cp->inv_expr_id]++;
4826 if (used_inv_expr[cp->inv_expr_id] == 1)
4827 n++;
4831 free (used_inv_expr);
4832 return n;
4835 /* Computes the cost field of IVS structure. */
4837 static void
4838 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4840 unsigned n_inv_exprs = 0;
4841 comp_cost cost = ivs->cand_use_cost;
4843 cost.cost += ivs->cand_cost;
4845 n_inv_exprs = iv_ca_get_num_inv_exprs (data, ivs);
4846 cost.cost += ivopts_global_cost_for_size (data,
4847 ivs->n_regs + n_inv_exprs);
4849 ivs->cost = cost;
4852 /* Remove invariants in set INVS to set IVS. */
4854 static void
4855 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4857 bitmap_iterator bi;
4858 unsigned iid;
4860 if (!invs)
4861 return;
4863 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4865 ivs->n_invariant_uses[iid]--;
4866 if (ivs->n_invariant_uses[iid] == 0)
4867 ivs->n_regs--;
4871 /* Set USE not to be expressed by any candidate in IVS. */
4873 static void
4874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4875 struct iv_use *use)
4877 unsigned uid = use->id, cid;
4878 struct cost_pair *cp;
4880 cp = ivs->cand_for_use[uid];
4881 if (!cp)
4882 return;
4883 cid = cp->cand->id;
4885 ivs->bad_uses++;
4886 ivs->cand_for_use[uid] = NULL;
4887 ivs->n_cand_uses[cid]--;
4889 if (ivs->n_cand_uses[cid] == 0)
4891 bitmap_clear_bit (ivs->cands, cid);
4892 /* Do not count the pseudocandidates. */
4893 if (cp->cand->iv)
4894 ivs->n_regs--;
4895 ivs->n_cands--;
4896 ivs->cand_cost -= cp->cand->cost;
4898 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4901 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4903 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4904 iv_ca_recount_cost (data, ivs);
4907 /* Add invariants in set INVS to set IVS. */
4909 static void
4910 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4912 bitmap_iterator bi;
4913 unsigned iid;
4915 if (!invs)
4916 return;
4918 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4920 ivs->n_invariant_uses[iid]++;
4921 if (ivs->n_invariant_uses[iid] == 1)
4922 ivs->n_regs++;
4926 /* Set cost pair for USE in set IVS to CP. */
4928 static void
4929 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4930 struct iv_use *use, struct cost_pair *cp)
4932 unsigned uid = use->id, cid;
4934 if (ivs->cand_for_use[uid] == cp)
4935 return;
4937 if (ivs->cand_for_use[uid])
4938 iv_ca_set_no_cp (data, ivs, use);
4940 if (cp)
4942 cid = cp->cand->id;
4944 ivs->bad_uses--;
4945 ivs->cand_for_use[uid] = cp;
4946 ivs->n_cand_uses[cid]++;
4947 if (ivs->n_cand_uses[cid] == 1)
4949 bitmap_set_bit (ivs->cands, cid);
4950 /* Do not count the pseudocandidates. */
4951 if (cp->cand->iv)
4952 ivs->n_regs++;
4953 ivs->n_cands++;
4954 ivs->cand_cost += cp->cand->cost;
4956 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4959 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4960 iv_ca_set_add_invariants (ivs, cp->depends_on);
4961 iv_ca_recount_cost (data, ivs);
4965 /* Extend set IVS by expressing USE by some of the candidates in it
4966 if possible. All important candidates will be considered
4967 if IMPORTANT_CANDIDATES is true. */
4969 static void
4970 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4971 struct iv_use *use, bool important_candidates)
4973 struct cost_pair *best_cp = NULL, *cp;
4974 bitmap_iterator bi;
4975 bitmap cands;
4976 unsigned i;
4978 gcc_assert (ivs->upto >= use->id);
4980 if (ivs->upto == use->id)
4982 ivs->upto++;
4983 ivs->bad_uses++;
4986 cands = (important_candidates ? data->important_candidates : ivs->cands);
4987 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4989 struct iv_cand *cand = iv_cand (data, i);
4991 cp = get_use_iv_cost (data, use, cand);
4993 if (cheaper_cost_pair (cp, best_cp))
4994 best_cp = cp;
4997 iv_ca_set_cp (data, ivs, use, best_cp);
5000 /* Get cost for assignment IVS. */
5002 static comp_cost
5003 iv_ca_cost (struct iv_ca *ivs)
5005 /* This was a conditional expression but it triggered a bug in
5006 Sun C 5.5. */
5007 if (ivs->bad_uses)
5008 return infinite_cost;
5009 else
5010 return ivs->cost;
5013 /* Returns true if all dependences of CP are among invariants in IVS. */
5015 static bool
5016 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5018 unsigned i;
5019 bitmap_iterator bi;
5021 if (!cp->depends_on)
5022 return true;
5024 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5026 if (ivs->n_invariant_uses[i] == 0)
5027 return false;
5030 return true;
5033 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5034 it before NEXT_CHANGE. */
5036 static struct iv_ca_delta *
5037 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5038 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5040 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5042 change->use = use;
5043 change->old_cp = old_cp;
5044 change->new_cp = new_cp;
5045 change->next_change = next_change;
5047 return change;
5050 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5051 are rewritten. */
5053 static struct iv_ca_delta *
5054 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5056 struct iv_ca_delta *last;
5058 if (!l2)
5059 return l1;
5061 if (!l1)
5062 return l2;
5064 for (last = l1; last->next_change; last = last->next_change)
5065 continue;
5066 last->next_change = l2;
5068 return l1;
5071 /* Reverse the list of changes DELTA, forming the inverse to it. */
5073 static struct iv_ca_delta *
5074 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5076 struct iv_ca_delta *act, *next, *prev = NULL;
5077 struct cost_pair *tmp;
5079 for (act = delta; act; act = next)
5081 next = act->next_change;
5082 act->next_change = prev;
5083 prev = act;
5085 tmp = act->old_cp;
5086 act->old_cp = act->new_cp;
5087 act->new_cp = tmp;
5090 return prev;
5093 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5094 reverted instead. */
5096 static void
5097 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5098 struct iv_ca_delta *delta, bool forward)
5100 struct cost_pair *from, *to;
5101 struct iv_ca_delta *act;
5103 if (!forward)
5104 delta = iv_ca_delta_reverse (delta);
5106 for (act = delta; act; act = act->next_change)
5108 from = act->old_cp;
5109 to = act->new_cp;
5110 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5111 iv_ca_set_cp (data, ivs, act->use, to);
5114 if (!forward)
5115 iv_ca_delta_reverse (delta);
5118 /* Returns true if CAND is used in IVS. */
5120 static bool
5121 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5123 return ivs->n_cand_uses[cand->id] > 0;
5126 /* Returns number of induction variable candidates in the set IVS. */
5128 static unsigned
5129 iv_ca_n_cands (struct iv_ca *ivs)
5131 return ivs->n_cands;
5134 /* Free the list of changes DELTA. */
5136 static void
5137 iv_ca_delta_free (struct iv_ca_delta **delta)
5139 struct iv_ca_delta *act, *next;
5141 for (act = *delta; act; act = next)
5143 next = act->next_change;
5144 free (act);
5147 *delta = NULL;
5150 /* Allocates new iv candidates assignment. */
5152 static struct iv_ca *
5153 iv_ca_new (struct ivopts_data *data)
5155 struct iv_ca *nw = XNEW (struct iv_ca);
5157 nw->upto = 0;
5158 nw->bad_uses = 0;
5159 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5160 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5161 nw->cands = BITMAP_ALLOC (NULL);
5162 nw->n_cands = 0;
5163 nw->n_regs = 0;
5164 nw->cand_use_cost = zero_cost;
5165 nw->cand_cost = 0;
5166 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5167 nw->cost = zero_cost;
5169 return nw;
5172 /* Free memory occupied by the set IVS. */
5174 static void
5175 iv_ca_free (struct iv_ca **ivs)
5177 free ((*ivs)->cand_for_use);
5178 free ((*ivs)->n_cand_uses);
5179 BITMAP_FREE ((*ivs)->cands);
5180 free ((*ivs)->n_invariant_uses);
5181 free (*ivs);
5182 *ivs = NULL;
5185 /* Dumps IVS to FILE. */
5187 static void
5188 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5190 const char *pref = " invariants ";
5191 unsigned i;
5192 comp_cost cost = iv_ca_cost (ivs);
5194 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5195 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5196 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5197 bitmap_print (file, ivs->cands, " candidates: ","\n");
5199 for (i = 0; i < ivs->upto; i++)
5201 struct iv_use *use = iv_use (data, i);
5202 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5203 if (cp)
5204 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5205 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5206 else
5207 fprintf (file, " use:%d --> ??\n", use->id);
5210 for (i = 1; i <= data->max_inv_id; i++)
5211 if (ivs->n_invariant_uses[i])
5213 fprintf (file, "%s%d", pref, i);
5214 pref = ", ";
5216 fprintf (file, "\n\n");
5219 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5220 new set, and store differences in DELTA. Number of induction variables
5221 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5222 the function will try to find a solution with mimimal iv candidates. */
5224 static comp_cost
5225 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5226 struct iv_cand *cand, struct iv_ca_delta **delta,
5227 unsigned *n_ivs, bool min_ncand)
5229 unsigned i;
5230 comp_cost cost;
5231 struct iv_use *use;
5232 struct cost_pair *old_cp, *new_cp;
5234 *delta = NULL;
5235 for (i = 0; i < ivs->upto; i++)
5237 use = iv_use (data, i);
5238 old_cp = iv_ca_cand_for_use (ivs, use);
5240 if (old_cp
5241 && old_cp->cand == cand)
5242 continue;
5244 new_cp = get_use_iv_cost (data, use, cand);
5245 if (!new_cp)
5246 continue;
5248 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5249 continue;
5251 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5252 continue;
5254 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5257 iv_ca_delta_commit (data, ivs, *delta, true);
5258 cost = iv_ca_cost (ivs);
5259 if (n_ivs)
5260 *n_ivs = iv_ca_n_cands (ivs);
5261 iv_ca_delta_commit (data, ivs, *delta, false);
5263 return cost;
5266 /* Try narrowing set IVS by removing CAND. Return the cost of
5267 the new set and store the differences in DELTA. */
5269 static comp_cost
5270 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5271 struct iv_cand *cand, struct iv_ca_delta **delta)
5273 unsigned i, ci;
5274 struct iv_use *use;
5275 struct cost_pair *old_cp, *new_cp, *cp;
5276 bitmap_iterator bi;
5277 struct iv_cand *cnd;
5278 comp_cost cost;
5280 *delta = NULL;
5281 for (i = 0; i < n_iv_uses (data); i++)
5283 use = iv_use (data, i);
5285 old_cp = iv_ca_cand_for_use (ivs, use);
5286 if (old_cp->cand != cand)
5287 continue;
5289 new_cp = NULL;
5291 if (data->consider_all_candidates)
5293 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5295 if (ci == cand->id)
5296 continue;
5298 cnd = iv_cand (data, ci);
5300 cp = get_use_iv_cost (data, use, cnd);
5301 if (!cp)
5302 continue;
5304 if (!iv_ca_has_deps (ivs, cp))
5305 continue;
5307 if (!cheaper_cost_pair (cp, new_cp))
5308 continue;
5310 new_cp = cp;
5313 else
5315 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5317 if (ci == cand->id)
5318 continue;
5320 cnd = iv_cand (data, ci);
5322 cp = get_use_iv_cost (data, use, cnd);
5323 if (!cp)
5324 continue;
5325 if (!iv_ca_has_deps (ivs, cp))
5326 continue;
5328 if (!cheaper_cost_pair (cp, new_cp))
5329 continue;
5331 new_cp = cp;
5335 if (!new_cp)
5337 iv_ca_delta_free (delta);
5338 return infinite_cost;
5341 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5344 iv_ca_delta_commit (data, ivs, *delta, true);
5345 cost = iv_ca_cost (ivs);
5346 iv_ca_delta_commit (data, ivs, *delta, false);
5348 return cost;
5351 /* Try optimizing the set of candidates IVS by removing candidates different
5352 from to EXCEPT_CAND from it. Return cost of the new set, and store
5353 differences in DELTA. */
5355 static comp_cost
5356 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5357 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5359 bitmap_iterator bi;
5360 struct iv_ca_delta *act_delta, *best_delta;
5361 unsigned i;
5362 comp_cost best_cost, acost;
5363 struct iv_cand *cand;
5365 best_delta = NULL;
5366 best_cost = iv_ca_cost (ivs);
5368 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5370 cand = iv_cand (data, i);
5372 if (cand == except_cand)
5373 continue;
5375 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5377 if (compare_costs (acost, best_cost) < 0)
5379 best_cost = acost;
5380 iv_ca_delta_free (&best_delta);
5381 best_delta = act_delta;
5383 else
5384 iv_ca_delta_free (&act_delta);
5387 if (!best_delta)
5389 *delta = NULL;
5390 return best_cost;
5393 /* Recurse to possibly remove other unnecessary ivs. */
5394 iv_ca_delta_commit (data, ivs, best_delta, true);
5395 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5396 iv_ca_delta_commit (data, ivs, best_delta, false);
5397 *delta = iv_ca_delta_join (best_delta, *delta);
5398 return best_cost;
5401 /* Tries to extend the sets IVS in the best possible way in order
5402 to express the USE. If ORIGINALP is true, prefer candidates from
5403 the original set of IVs, otherwise favor important candidates not
5404 based on any memory object. */
5406 static bool
5407 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5408 struct iv_use *use, bool originalp)
5410 comp_cost best_cost, act_cost;
5411 unsigned i;
5412 bitmap_iterator bi;
5413 struct iv_cand *cand;
5414 struct iv_ca_delta *best_delta = NULL, *act_delta;
5415 struct cost_pair *cp;
5417 iv_ca_add_use (data, ivs, use, false);
5418 best_cost = iv_ca_cost (ivs);
5420 cp = iv_ca_cand_for_use (ivs, use);
5421 if (!cp)
5423 ivs->upto--;
5424 ivs->bad_uses--;
5425 iv_ca_add_use (data, ivs, use, true);
5426 best_cost = iv_ca_cost (ivs);
5427 cp = iv_ca_cand_for_use (ivs, use);
5429 if (cp)
5431 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5432 iv_ca_set_no_cp (data, ivs, use);
5435 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5436 first try important candidates not based on any memory object. Only if
5437 this fails, try the specific ones. Rationale -- in loops with many
5438 variables the best choice often is to use just one generic biv. If we
5439 added here many ivs specific to the uses, the optimization algorithm later
5440 would be likely to get stuck in a local minimum, thus causing us to create
5441 too many ivs. The approach from few ivs to more seems more likely to be
5442 successful -- starting from few ivs, replacing an expensive use by a
5443 specific iv should always be a win. */
5444 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5446 cand = iv_cand (data, i);
5448 if (originalp && cand->pos !=IP_ORIGINAL)
5449 continue;
5451 if (!originalp && cand->iv->base_object != NULL_TREE)
5452 continue;
5454 if (iv_ca_cand_used_p (ivs, cand))
5455 continue;
5457 cp = get_use_iv_cost (data, use, cand);
5458 if (!cp)
5459 continue;
5461 iv_ca_set_cp (data, ivs, use, cp);
5462 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5463 true);
5464 iv_ca_set_no_cp (data, ivs, use);
5465 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5467 if (compare_costs (act_cost, best_cost) < 0)
5469 best_cost = act_cost;
5471 iv_ca_delta_free (&best_delta);
5472 best_delta = act_delta;
5474 else
5475 iv_ca_delta_free (&act_delta);
5478 if (infinite_cost_p (best_cost))
5480 for (i = 0; i < use->n_map_members; i++)
5482 cp = use->cost_map + i;
5483 cand = cp->cand;
5484 if (!cand)
5485 continue;
5487 /* Already tried this. */
5488 if (cand->important)
5490 if (originalp && cand->pos == IP_ORIGINAL)
5491 continue;
5492 if (!originalp && cand->iv->base_object == NULL_TREE)
5493 continue;
5496 if (iv_ca_cand_used_p (ivs, cand))
5497 continue;
5499 act_delta = NULL;
5500 iv_ca_set_cp (data, ivs, use, cp);
5501 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5502 iv_ca_set_no_cp (data, ivs, use);
5503 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5504 cp, act_delta);
5506 if (compare_costs (act_cost, best_cost) < 0)
5508 best_cost = act_cost;
5510 if (best_delta)
5511 iv_ca_delta_free (&best_delta);
5512 best_delta = act_delta;
5514 else
5515 iv_ca_delta_free (&act_delta);
5519 iv_ca_delta_commit (data, ivs, best_delta, true);
5520 iv_ca_delta_free (&best_delta);
5522 return !infinite_cost_p (best_cost);
5525 /* Finds an initial assignment of candidates to uses. */
5527 static struct iv_ca *
5528 get_initial_solution (struct ivopts_data *data, bool originalp)
5530 struct iv_ca *ivs = iv_ca_new (data);
5531 unsigned i;
5533 for (i = 0; i < n_iv_uses (data); i++)
5534 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5536 iv_ca_free (&ivs);
5537 return NULL;
5540 return ivs;
5543 /* Tries to improve set of induction variables IVS. */
5545 static bool
5546 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5548 unsigned i, n_ivs;
5549 comp_cost acost, best_cost = iv_ca_cost (ivs);
5550 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5551 struct iv_cand *cand;
5553 /* Try extending the set of induction variables by one. */
5554 for (i = 0; i < n_iv_cands (data); i++)
5556 cand = iv_cand (data, i);
5558 if (iv_ca_cand_used_p (ivs, cand))
5559 continue;
5561 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5562 if (!act_delta)
5563 continue;
5565 /* If we successfully added the candidate and the set is small enough,
5566 try optimizing it by removing other candidates. */
5567 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5569 iv_ca_delta_commit (data, ivs, act_delta, true);
5570 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5571 iv_ca_delta_commit (data, ivs, act_delta, false);
5572 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5575 if (compare_costs (acost, best_cost) < 0)
5577 best_cost = acost;
5578 iv_ca_delta_free (&best_delta);
5579 best_delta = act_delta;
5581 else
5582 iv_ca_delta_free (&act_delta);
5585 if (!best_delta)
5587 /* Try removing the candidates from the set instead. */
5588 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5590 /* Nothing more we can do. */
5591 if (!best_delta)
5592 return false;
5595 iv_ca_delta_commit (data, ivs, best_delta, true);
5596 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5597 iv_ca_delta_free (&best_delta);
5598 return true;
5601 /* Attempts to find the optimal set of induction variables. We do simple
5602 greedy heuristic -- we try to replace at most one candidate in the selected
5603 solution and remove the unused ivs while this improves the cost. */
5605 static struct iv_ca *
5606 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5608 struct iv_ca *set;
5610 /* Get the initial solution. */
5611 set = get_initial_solution (data, originalp);
5612 if (!set)
5614 if (dump_file && (dump_flags & TDF_DETAILS))
5615 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5616 return NULL;
5619 if (dump_file && (dump_flags & TDF_DETAILS))
5621 fprintf (dump_file, "Initial set of candidates:\n");
5622 iv_ca_dump (data, dump_file, set);
5625 while (try_improve_iv_set (data, set))
5627 if (dump_file && (dump_flags & TDF_DETAILS))
5629 fprintf (dump_file, "Improved to:\n");
5630 iv_ca_dump (data, dump_file, set);
5634 return set;
5637 static struct iv_ca *
5638 find_optimal_iv_set (struct ivopts_data *data)
5640 unsigned i;
5641 struct iv_ca *set, *origset;
5642 struct iv_use *use;
5643 comp_cost cost, origcost;
5645 /* Determine the cost based on a strategy that starts with original IVs,
5646 and try again using a strategy that prefers candidates not based
5647 on any IVs. */
5648 origset = find_optimal_iv_set_1 (data, true);
5649 set = find_optimal_iv_set_1 (data, false);
5651 if (!origset && !set)
5652 return NULL;
5654 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5655 cost = set ? iv_ca_cost (set) : infinite_cost;
5657 if (dump_file && (dump_flags & TDF_DETAILS))
5659 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5660 origcost.cost, origcost.complexity);
5661 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5662 cost.cost, cost.complexity);
5665 /* Choose the one with the best cost. */
5666 if (compare_costs (origcost, cost) <= 0)
5668 if (set)
5669 iv_ca_free (&set);
5670 set = origset;
5672 else if (origset)
5673 iv_ca_free (&origset);
5675 for (i = 0; i < n_iv_uses (data); i++)
5677 use = iv_use (data, i);
5678 use->selected = iv_ca_cand_for_use (set, use)->cand;
5681 return set;
5684 /* Creates a new induction variable corresponding to CAND. */
5686 static void
5687 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5689 gimple_stmt_iterator incr_pos;
5690 tree base;
5691 bool after = false;
5693 if (!cand->iv)
5694 return;
5696 switch (cand->pos)
5698 case IP_NORMAL:
5699 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5700 break;
5702 case IP_END:
5703 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5704 after = true;
5705 break;
5707 case IP_AFTER_USE:
5708 after = true;
5709 /* fall through */
5710 case IP_BEFORE_USE:
5711 incr_pos = gsi_for_stmt (cand->incremented_at);
5712 break;
5714 case IP_ORIGINAL:
5715 /* Mark that the iv is preserved. */
5716 name_info (data, cand->var_before)->preserve_biv = true;
5717 name_info (data, cand->var_after)->preserve_biv = true;
5719 /* Rewrite the increment so that it uses var_before directly. */
5720 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5721 return;
5724 gimple_add_tmp_var (cand->var_before);
5725 add_referenced_var (cand->var_before);
5727 base = unshare_expr (cand->iv->base);
5729 create_iv (base, unshare_expr (cand->iv->step),
5730 cand->var_before, data->current_loop,
5731 &incr_pos, after, &cand->var_before, &cand->var_after);
5734 /* Creates new induction variables described in SET. */
5736 static void
5737 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5739 unsigned i;
5740 struct iv_cand *cand;
5741 bitmap_iterator bi;
5743 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5745 cand = iv_cand (data, i);
5746 create_new_iv (data, cand);
5749 if (dump_file && (dump_flags & TDF_DETAILS))
5751 fprintf (dump_file, "\nSelected IV set: \n");
5752 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5754 cand = iv_cand (data, i);
5755 dump_cand (dump_file, cand);
5757 fprintf (dump_file, "\n");
5761 /* Rewrites USE (definition of iv used in a nonlinear expression)
5762 using candidate CAND. */
5764 static void
5765 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5766 struct iv_use *use, struct iv_cand *cand)
5768 tree comp;
5769 tree op, tgt;
5770 gimple ass;
5771 gimple_stmt_iterator bsi;
5773 /* An important special case -- if we are asked to express value of
5774 the original iv by itself, just exit; there is no need to
5775 introduce a new computation (that might also need casting the
5776 variable to unsigned and back). */
5777 if (cand->pos == IP_ORIGINAL
5778 && cand->incremented_at == use->stmt)
5780 tree step, ctype, utype;
5781 enum tree_code incr_code = PLUS_EXPR, old_code;
5783 gcc_assert (is_gimple_assign (use->stmt));
5784 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5786 step = cand->iv->step;
5787 ctype = TREE_TYPE (step);
5788 utype = TREE_TYPE (cand->var_after);
5789 if (TREE_CODE (step) == NEGATE_EXPR)
5791 incr_code = MINUS_EXPR;
5792 step = TREE_OPERAND (step, 0);
5795 /* Check whether we may leave the computation unchanged.
5796 This is the case only if it does not rely on other
5797 computations in the loop -- otherwise, the computation
5798 we rely upon may be removed in remove_unused_ivs,
5799 thus leading to ICE. */
5800 old_code = gimple_assign_rhs_code (use->stmt);
5801 if (old_code == PLUS_EXPR
5802 || old_code == MINUS_EXPR
5803 || old_code == POINTER_PLUS_EXPR)
5805 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5806 op = gimple_assign_rhs2 (use->stmt);
5807 else if (old_code != MINUS_EXPR
5808 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5809 op = gimple_assign_rhs1 (use->stmt);
5810 else
5811 op = NULL_TREE;
5813 else
5814 op = NULL_TREE;
5816 if (op
5817 && (TREE_CODE (op) == INTEGER_CST
5818 || operand_equal_p (op, step, 0)))
5819 return;
5821 /* Otherwise, add the necessary computations to express
5822 the iv. */
5823 op = fold_convert (ctype, cand->var_before);
5824 comp = fold_convert (utype,
5825 build2 (incr_code, ctype, op,
5826 unshare_expr (step)));
5828 else
5830 comp = get_computation (data->current_loop, use, cand);
5831 gcc_assert (comp != NULL_TREE);
5834 switch (gimple_code (use->stmt))
5836 case GIMPLE_PHI:
5837 tgt = PHI_RESULT (use->stmt);
5839 /* If we should keep the biv, do not replace it. */
5840 if (name_info (data, tgt)->preserve_biv)
5841 return;
5843 bsi = gsi_after_labels (gimple_bb (use->stmt));
5844 break;
5846 case GIMPLE_ASSIGN:
5847 tgt = gimple_assign_lhs (use->stmt);
5848 bsi = gsi_for_stmt (use->stmt);
5849 break;
5851 default:
5852 gcc_unreachable ();
5855 if (!valid_gimple_rhs_p (comp)
5856 || (gimple_code (use->stmt) != GIMPLE_PHI
5857 /* We can't allow re-allocating the stmt as it might be pointed
5858 to still. */
5859 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5860 >= gimple_num_ops (gsi_stmt (bsi)))))
5862 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5863 true, GSI_SAME_STMT);
5864 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5865 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5868 if (gimple_code (use->stmt) == GIMPLE_PHI)
5870 ass = gimple_build_assign (tgt, comp);
5871 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5873 bsi = gsi_for_stmt (use->stmt);
5874 remove_phi_node (&bsi, false);
5876 else
5878 gimple_assign_set_rhs_from_tree (&bsi, comp);
5879 use->stmt = gsi_stmt (bsi);
5883 /* Replaces ssa name in index IDX by its basic variable. Callback for
5884 for_each_index. */
5886 static bool
5887 idx_remove_ssa_names (tree base, tree *idx,
5888 void *data ATTRIBUTE_UNUSED)
5890 tree *op;
5892 if (TREE_CODE (*idx) == SSA_NAME)
5893 *idx = SSA_NAME_VAR (*idx);
5895 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5897 op = &TREE_OPERAND (base, 2);
5898 if (*op
5899 && TREE_CODE (*op) == SSA_NAME)
5900 *op = SSA_NAME_VAR (*op);
5901 op = &TREE_OPERAND (base, 3);
5902 if (*op
5903 && TREE_CODE (*op) == SSA_NAME)
5904 *op = SSA_NAME_VAR (*op);
5907 return true;
5910 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5912 static tree
5913 unshare_and_remove_ssa_names (tree ref)
5915 ref = unshare_expr (ref);
5916 for_each_index (&ref, idx_remove_ssa_names, NULL);
5918 return ref;
5921 /* Copies the reference information from OLD_REF to NEW_REF. */
5923 static void
5924 copy_ref_info (tree new_ref, tree old_ref)
5926 tree new_ptr_base = NULL_TREE;
5928 if (TREE_CODE (old_ref) == TARGET_MEM_REF
5929 && TREE_CODE (new_ref) == TARGET_MEM_REF)
5930 TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
5931 else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5932 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5934 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5935 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5937 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5938 new_ptr_base = TMR_BASE (new_ref);
5939 else if (TREE_CODE (new_ref) == MEM_REF)
5940 new_ptr_base = TREE_OPERAND (new_ref, 0);
5942 /* We can transfer points-to information from an old pointer
5943 or decl base to the new one. */
5944 if (new_ptr_base
5945 && TREE_CODE (new_ptr_base) == SSA_NAME
5946 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5947 && !SSA_NAME_PTR_INFO (new_ptr_base))
5949 tree base = get_base_address (old_ref);
5950 if (!base)
5952 else if ((INDIRECT_REF_P (base)
5953 || TREE_CODE (base) == MEM_REF)
5954 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5955 duplicate_ssa_name_ptr_info
5956 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5957 else if (TREE_CODE (base) == VAR_DECL
5958 || TREE_CODE (base) == PARM_DECL
5959 || TREE_CODE (base) == RESULT_DECL)
5961 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5962 pt_solution_set_var (&pi->pt, base);
5967 /* Performs a peephole optimization to reorder the iv update statement with
5968 a mem ref to enable instruction combining in later phases. The mem ref uses
5969 the iv value before the update, so the reordering transformation requires
5970 adjustment of the offset. CAND is the selected IV_CAND.
5972 Example:
5974 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5975 iv2 = iv1 + 1;
5977 if (t < val) (1)
5978 goto L;
5979 goto Head;
5982 directly propagating t over to (1) will introduce overlapping live range
5983 thus increase register pressure. This peephole transform it into:
5986 iv2 = iv1 + 1;
5987 t = MEM_REF (base, iv2, 8, 8);
5988 if (t < val)
5989 goto L;
5990 goto Head;
5993 static void
5994 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5996 tree var_after;
5997 gimple iv_update, stmt;
5998 basic_block bb;
5999 gimple_stmt_iterator gsi, gsi_iv;
6001 if (cand->pos != IP_NORMAL)
6002 return;
6004 var_after = cand->var_after;
6005 iv_update = SSA_NAME_DEF_STMT (var_after);
6007 bb = gimple_bb (iv_update);
6008 gsi = gsi_last_nondebug_bb (bb);
6009 stmt = gsi_stmt (gsi);
6011 /* Only handle conditional statement for now. */
6012 if (gimple_code (stmt) != GIMPLE_COND)
6013 return;
6015 gsi_prev_nondebug (&gsi);
6016 stmt = gsi_stmt (gsi);
6017 if (stmt != iv_update)
6018 return;
6020 gsi_prev_nondebug (&gsi);
6021 if (gsi_end_p (gsi))
6022 return;
6024 stmt = gsi_stmt (gsi);
6025 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6026 return;
6028 if (stmt != use->stmt)
6029 return;
6031 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6032 return;
6034 if (dump_file && (dump_flags & TDF_DETAILS))
6036 fprintf (dump_file, "Reordering \n");
6037 print_gimple_stmt (dump_file, iv_update, 0, 0);
6038 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6039 fprintf (dump_file, "\n");
6042 gsi = gsi_for_stmt (use->stmt);
6043 gsi_iv = gsi_for_stmt (iv_update);
6044 gsi_move_before (&gsi_iv, &gsi);
6046 cand->pos = IP_BEFORE_USE;
6047 cand->incremented_at = use->stmt;
6050 /* Rewrites USE (address that is an iv) using candidate CAND. */
6052 static void
6053 rewrite_use_address (struct ivopts_data *data,
6054 struct iv_use *use, struct iv_cand *cand)
6056 aff_tree aff;
6057 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6058 tree base_hint = NULL_TREE;
6059 tree ref, iv;
6060 bool ok;
6062 adjust_iv_update_pos (cand, use);
6063 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6064 gcc_assert (ok);
6065 unshare_aff_combination (&aff);
6067 /* To avoid undefined overflow problems, all IV candidates use unsigned
6068 integer types. The drawback is that this makes it impossible for
6069 create_mem_ref to distinguish an IV that is based on a memory object
6070 from one that represents simply an offset.
6072 To work around this problem, we pass a hint to create_mem_ref that
6073 indicates which variable (if any) in aff is an IV based on a memory
6074 object. Note that we only consider the candidate. If this is not
6075 based on an object, the base of the reference is in some subexpression
6076 of the use -- but these will use pointer types, so they are recognized
6077 by the create_mem_ref heuristics anyway. */
6078 if (cand->iv->base_object)
6079 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6081 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6082 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6083 reference_alias_ptr_type (*use->op_p),
6084 iv, base_hint, data->speed);
6085 copy_ref_info (ref, *use->op_p);
6086 *use->op_p = ref;
6089 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6090 candidate CAND. */
6092 static void
6093 rewrite_use_compare (struct ivopts_data *data,
6094 struct iv_use *use, struct iv_cand *cand)
6096 tree comp, *var_p, op, bound;
6097 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6098 enum tree_code compare;
6099 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6100 bool ok;
6102 bound = cp->value;
6103 if (bound)
6105 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6106 tree var_type = TREE_TYPE (var);
6107 gimple_seq stmts;
6109 if (dump_file && (dump_flags & TDF_DETAILS))
6111 fprintf (dump_file, "Replacing exit test: ");
6112 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6114 compare = iv_elimination_compare (data, use);
6115 bound = unshare_expr (fold_convert (var_type, bound));
6116 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6117 if (stmts)
6118 gsi_insert_seq_on_edge_immediate (
6119 loop_preheader_edge (data->current_loop),
6120 stmts);
6122 gimple_cond_set_lhs (use->stmt, var);
6123 gimple_cond_set_code (use->stmt, compare);
6124 gimple_cond_set_rhs (use->stmt, op);
6125 return;
6128 /* The induction variable elimination failed; just express the original
6129 giv. */
6130 comp = get_computation (data->current_loop, use, cand);
6131 gcc_assert (comp != NULL_TREE);
6133 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6134 gcc_assert (ok);
6136 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6137 true, GSI_SAME_STMT);
6140 /* Rewrites USE using candidate CAND. */
6142 static void
6143 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6145 switch (use->type)
6147 case USE_NONLINEAR_EXPR:
6148 rewrite_use_nonlinear_expr (data, use, cand);
6149 break;
6151 case USE_ADDRESS:
6152 rewrite_use_address (data, use, cand);
6153 break;
6155 case USE_COMPARE:
6156 rewrite_use_compare (data, use, cand);
6157 break;
6159 default:
6160 gcc_unreachable ();
6163 update_stmt (use->stmt);
6166 /* Rewrite the uses using the selected induction variables. */
6168 static void
6169 rewrite_uses (struct ivopts_data *data)
6171 unsigned i;
6172 struct iv_cand *cand;
6173 struct iv_use *use;
6175 for (i = 0; i < n_iv_uses (data); i++)
6177 use = iv_use (data, i);
6178 cand = use->selected;
6179 gcc_assert (cand);
6181 rewrite_use (data, use, cand);
6185 /* Removes the ivs that are not used after rewriting. */
6187 static void
6188 remove_unused_ivs (struct ivopts_data *data)
6190 unsigned j;
6191 bitmap_iterator bi;
6192 bitmap toremove = BITMAP_ALLOC (NULL);
6194 /* Figure out an order in which to release SSA DEFs so that we don't
6195 release something that we'd have to propagate into a debug stmt
6196 afterwards. */
6197 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6199 struct version_info *info;
6201 info = ver_info (data, j);
6202 if (info->iv
6203 && !integer_zerop (info->iv->step)
6204 && !info->inv_id
6205 && !info->iv->have_use_for
6206 && !info->preserve_biv)
6207 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6210 release_defs_bitset (toremove);
6212 BITMAP_FREE (toremove);
6215 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6216 for pointer_map_traverse. */
6218 static bool
6219 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6220 void *data ATTRIBUTE_UNUSED)
6222 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6224 free (niter);
6225 return true;
6228 /* Frees data allocated by the optimization of a single loop. */
6230 static void
6231 free_loop_data (struct ivopts_data *data)
6233 unsigned i, j;
6234 bitmap_iterator bi;
6235 tree obj;
6237 if (data->niters)
6239 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6240 pointer_map_destroy (data->niters);
6241 data->niters = NULL;
6244 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6246 struct version_info *info;
6248 info = ver_info (data, i);
6249 if (info->iv)
6250 free (info->iv);
6251 info->iv = NULL;
6252 info->has_nonlin_use = false;
6253 info->preserve_biv = false;
6254 info->inv_id = 0;
6256 bitmap_clear (data->relevant);
6257 bitmap_clear (data->important_candidates);
6259 for (i = 0; i < n_iv_uses (data); i++)
6261 struct iv_use *use = iv_use (data, i);
6263 free (use->iv);
6264 BITMAP_FREE (use->related_cands);
6265 for (j = 0; j < use->n_map_members; j++)
6266 if (use->cost_map[j].depends_on)
6267 BITMAP_FREE (use->cost_map[j].depends_on);
6268 free (use->cost_map);
6269 free (use);
6271 VEC_truncate (iv_use_p, data->iv_uses, 0);
6273 for (i = 0; i < n_iv_cands (data); i++)
6275 struct iv_cand *cand = iv_cand (data, i);
6277 if (cand->iv)
6278 free (cand->iv);
6279 if (cand->depends_on)
6280 BITMAP_FREE (cand->depends_on);
6281 free (cand);
6283 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6285 if (data->version_info_size < num_ssa_names)
6287 data->version_info_size = 2 * num_ssa_names;
6288 free (data->version_info);
6289 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6292 data->max_inv_id = 0;
6294 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
6295 SET_DECL_RTL (obj, NULL_RTX);
6297 VEC_truncate (tree, decl_rtl_to_reset, 0);
6299 htab_empty (data->inv_expr_tab);
6300 data->inv_expr_id = 0;
6303 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6304 loop tree. */
6306 static void
6307 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6309 free_loop_data (data);
6310 free (data->version_info);
6311 BITMAP_FREE (data->relevant);
6312 BITMAP_FREE (data->important_candidates);
6314 VEC_free (tree, heap, decl_rtl_to_reset);
6315 VEC_free (iv_use_p, heap, data->iv_uses);
6316 VEC_free (iv_cand_p, heap, data->iv_candidates);
6317 htab_delete (data->inv_expr_tab);
6320 /* Returns true if the loop body BODY includes any function calls. */
6322 static bool
6323 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6325 gimple_stmt_iterator gsi;
6326 unsigned i;
6328 for (i = 0; i < num_nodes; i++)
6329 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6331 gimple stmt = gsi_stmt (gsi);
6332 if (is_gimple_call (stmt)
6333 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6334 return true;
6336 return false;
6339 /* Optimizes the LOOP. Returns true if anything changed. */
6341 static bool
6342 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6344 bool changed = false;
6345 struct iv_ca *iv_ca;
6346 edge exit;
6347 basic_block *body;
6349 gcc_assert (!data->niters);
6350 data->current_loop = loop;
6351 data->speed = optimize_loop_for_speed_p (loop);
6353 if (dump_file && (dump_flags & TDF_DETAILS))
6355 fprintf (dump_file, "Processing loop %d\n", loop->num);
6357 exit = single_dom_exit (loop);
6358 if (exit)
6360 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6361 exit->src->index, exit->dest->index);
6362 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6363 fprintf (dump_file, "\n");
6366 fprintf (dump_file, "\n");
6369 body = get_loop_body (loop);
6370 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6371 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6372 free (body);
6374 /* For each ssa name determines whether it behaves as an induction variable
6375 in some loop. */
6376 if (!find_induction_variables (data))
6377 goto finish;
6379 /* Finds interesting uses (item 1). */
6380 find_interesting_uses (data);
6381 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6382 goto finish;
6384 /* Finds candidates for the induction variables (item 2). */
6385 find_iv_candidates (data);
6387 /* Calculates the costs (item 3, part 1). */
6388 determine_iv_costs (data);
6389 determine_use_iv_costs (data);
6390 determine_set_costs (data);
6392 /* Find the optimal set of induction variables (item 3, part 2). */
6393 iv_ca = find_optimal_iv_set (data);
6394 if (!iv_ca)
6395 goto finish;
6396 changed = true;
6398 /* Create the new induction variables (item 4, part 1). */
6399 create_new_ivs (data, iv_ca);
6400 iv_ca_free (&iv_ca);
6402 /* Rewrite the uses (item 4, part 2). */
6403 rewrite_uses (data);
6405 /* Remove the ivs that are unused after rewriting. */
6406 remove_unused_ivs (data);
6408 /* We have changed the structure of induction variables; it might happen
6409 that definitions in the scev database refer to some of them that were
6410 eliminated. */
6411 scev_reset ();
6413 finish:
6414 free_loop_data (data);
6416 return changed;
6419 /* Main entry point. Optimizes induction variables in loops. */
6421 void
6422 tree_ssa_iv_optimize (void)
6424 struct loop *loop;
6425 struct ivopts_data data;
6426 loop_iterator li;
6428 tree_ssa_iv_optimize_init (&data);
6430 /* Optimize the loops starting with the innermost ones. */
6431 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6433 if (dump_file && (dump_flags & TDF_DETAILS))
6434 flow_loop_dump (loop, dump_file, NULL, 1);
6436 tree_ssa_iv_optimize_loop (&data, loop);
6439 tree_ssa_iv_optimize_finalize (&data);