Merged revisions 195034,195219,195245,195357,195374,195428,195599,195673,195809 via...
[official-gcc.git] / main / gcc / tree-ssa-loop-ivopts.c
blobd30bfec50626c2695ec8d4ee206ec51ded452c20
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "tm_p.h"
70 #include "basic-block.h"
71 #include "gimple-pretty-print.h"
72 #include "tree-flow.h"
73 #include "cfgloop.h"
74 #include "tree-pass.h"
75 #include "ggc.h"
76 #include "insn-config.h"
77 #include "pointer-set.h"
78 #include "hashtab.h"
79 #include "tree-chrec.h"
80 #include "tree-scalar-evolution.h"
81 #include "cfgloop.h"
82 #include "params.h"
83 #include "langhooks.h"
84 #include "tree-affine.h"
85 #include "target.h"
86 #include "tree-inline.h"
87 #include "tree-ssa-propagate.h"
88 #include "expmed.h"
90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
91 cost of different addressing modes. This should be moved to a TBD
92 interface between the GIMPLE and RTL worlds. */
93 #include "expr.h"
94 #include "recog.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 #define AVG_LOOP_NITER(LOOP) 5
101 /* Returns the expected number of loop iterations for LOOP.
102 The average trip count is computed from profile data if it
103 exists. */
105 static inline HOST_WIDE_INT
106 avg_loop_niter (struct loop *loop)
108 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
109 if (niter == -1)
110 return AVG_LOOP_NITER (loop);
112 return niter;
115 /* Representation of the induction variable. */
116 struct iv
118 tree base; /* Initial value of the iv. */
119 tree base_object; /* A memory object to that the induction variable points. */
120 tree step; /* Step of the iv (constant only). */
121 tree ssa_name; /* The ssa name with the value. */
122 bool biv_p; /* Is it a biv? */
123 bool have_use_for; /* Do we already have a use for it? */
124 unsigned use_id; /* The identifier in the use if it is the case. */
127 /* Per-ssa version information (induction variable descriptions, etc.). */
128 struct version_info
130 tree name; /* The ssa name. */
131 struct iv *iv; /* Induction variable description. */
132 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
133 an expression that is not an induction variable. */
134 bool preserve_biv; /* For the original biv, whether to preserve it. */
135 unsigned inv_id; /* Id of an invariant. */
138 /* Types of uses. */
139 enum use_type
141 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
142 USE_ADDRESS, /* Use in an address. */
143 USE_COMPARE /* Use is a compare. */
146 /* Cost of a computation. */
147 typedef struct
149 int cost; /* The runtime cost. */
150 unsigned complexity; /* The estimate of the complexity of the code for
151 the computation (in no concrete units --
152 complexity field should be larger for more
153 complex expressions and addressing modes). */
154 } comp_cost;
156 static const comp_cost no_cost = {0, 0};
157 static const comp_cost infinite_cost = {INFTY, INFTY};
159 /* The candidate - cost pair. */
160 struct cost_pair
162 struct iv_cand *cand; /* The candidate. */
163 comp_cost cost; /* The cost. */
164 bitmap depends_on; /* The list of invariants that have to be
165 preserved. */
166 tree value; /* For final value elimination, the expression for
167 the final value of the iv. For iv elimination,
168 the new bound to compare with. */
169 enum tree_code comp; /* For iv elimination, the comparison. */
170 int inv_expr_id; /* Loop invariant expression id. */
173 /* Use. */
174 struct iv_use
176 unsigned id; /* The id of the use. */
177 enum use_type type; /* Type of the use. */
178 struct iv *iv; /* The induction variable it is based on. */
179 gimple stmt; /* Statement in that it occurs. */
180 tree *op_p; /* The place where it occurs. */
181 bitmap related_cands; /* The set of "related" iv candidates, plus the common
182 important ones. */
184 unsigned n_map_members; /* Number of candidates in the cost_map list. */
185 struct cost_pair *cost_map;
186 /* The costs wrto the iv candidates. */
188 struct iv_cand *selected;
189 /* The selected candidate. */
192 /* The position where the iv is computed. */
193 enum iv_position
195 IP_NORMAL, /* At the end, just before the exit condition. */
196 IP_END, /* At the end of the latch block. */
197 IP_BEFORE_USE, /* Immediately before a specific use. */
198 IP_AFTER_USE, /* Immediately after a specific use. */
199 IP_ORIGINAL /* The original biv. */
202 /* The induction variable candidate. */
203 struct iv_cand
205 unsigned id; /* The number of the candidate. */
206 bool important; /* Whether this is an "important" candidate, i.e. such
207 that it should be considered by all uses. */
208 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
209 gimple incremented_at;/* For original biv, the statement where it is
210 incremented. */
211 tree var_before; /* The variable used for it before increment. */
212 tree var_after; /* The variable used for it after increment. */
213 struct iv *iv; /* The value of the candidate. NULL for
214 "pseudocandidate" used to indicate the possibility
215 to replace the final value of an iv by direct
216 computation of the value. */
217 unsigned cost; /* Cost of the candidate. */
218 unsigned cost_step; /* Cost of the candidate's increment operation. */
219 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
220 where it is incremented. */
221 bitmap depends_on; /* The list of invariants that are used in step of the
222 biv. */
225 /* Loop invariant expression hashtable entry. */
226 struct iv_inv_expr_ent
228 tree expr;
229 int id;
230 hashval_t hash;
233 /* The data used by the induction variable optimizations. */
235 typedef struct iv_use *iv_use_p;
237 typedef struct iv_cand *iv_cand_p;
239 struct ivopts_data
241 /* The currently optimized loop. */
242 struct loop *current_loop;
244 /* Numbers of iterations for all exits of the current loop. */
245 struct pointer_map_t *niters;
247 /* Number of registers used in it. */
248 unsigned regs_used;
250 /* The size of version_info array allocated. */
251 unsigned version_info_size;
253 /* The array of information for the ssa names. */
254 struct version_info *version_info;
256 /* The hashtable of loop invariant expressions created
257 by ivopt. */
258 htab_t inv_expr_tab;
260 /* Loop invariant expression id. */
261 int inv_expr_id;
263 /* The bitmap of indices in version_info whose value was changed. */
264 bitmap relevant;
266 /* The uses of induction variables. */
267 vec<iv_use_p> iv_uses;
269 /* The candidates. */
270 vec<iv_cand_p> iv_candidates;
272 /* A bitmap of important candidates. */
273 bitmap important_candidates;
275 /* The maximum invariant id. */
276 unsigned max_inv_id;
278 /* Whether to consider just related and important candidates when replacing a
279 use. */
280 bool consider_all_candidates;
282 /* Are we optimizing for speed? */
283 bool speed;
285 /* Whether the loop body includes any function calls. */
286 bool body_includes_call;
288 /* Whether the loop body can only be exited via single exit. */
289 bool loop_single_exit_p;
292 /* An assignment of iv candidates to uses. */
294 struct iv_ca
296 /* The number of uses covered by the assignment. */
297 unsigned upto;
299 /* Number of uses that cannot be expressed by the candidates in the set. */
300 unsigned bad_uses;
302 /* Candidate assigned to a use, together with the related costs. */
303 struct cost_pair **cand_for_use;
305 /* Number of times each candidate is used. */
306 unsigned *n_cand_uses;
308 /* The candidates used. */
309 bitmap cands;
311 /* The number of candidates in the set. */
312 unsigned n_cands;
314 /* Total number of registers needed. */
315 unsigned n_regs;
317 /* Total cost of expressing uses. */
318 comp_cost cand_use_cost;
320 /* Total cost of candidates. */
321 unsigned cand_cost;
323 /* Number of times each invariant is used. */
324 unsigned *n_invariant_uses;
326 /* The array holding the number of uses of each loop
327 invariant expressions created by ivopt. */
328 unsigned *used_inv_expr;
330 /* The number of created loop invariants. */
331 unsigned num_used_inv_expr;
333 /* Total cost of the assignment. */
334 comp_cost cost;
337 /* Difference of two iv candidate assignments. */
339 struct iv_ca_delta
341 /* Changed use. */
342 struct iv_use *use;
344 /* An old assignment (for rollback purposes). */
345 struct cost_pair *old_cp;
347 /* A new assignment. */
348 struct cost_pair *new_cp;
350 /* Next change in the list. */
351 struct iv_ca_delta *next_change;
354 /* Bound on number of candidates below that all candidates are considered. */
356 #define CONSIDER_ALL_CANDIDATES_BOUND \
357 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
360 optimizing such a loop would help, and it would take ages). */
362 #define MAX_CONSIDERED_USES \
363 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
365 /* If there are at most this number of ivs in the set, try removing unnecessary
366 ivs from the set always. */
368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
369 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
371 /* The list of trees for that the decl_rtl field must be reset is stored
372 here. */
374 static vec<tree> decl_rtl_to_reset;
376 static comp_cost force_expr_to_var_cost (tree, bool);
378 /* Number of uses recorded in DATA. */
380 static inline unsigned
381 n_iv_uses (struct ivopts_data *data)
383 return data->iv_uses.length ();
386 /* Ith use recorded in DATA. */
388 static inline struct iv_use *
389 iv_use (struct ivopts_data *data, unsigned i)
391 return data->iv_uses[i];
394 /* Number of candidates recorded in DATA. */
396 static inline unsigned
397 n_iv_cands (struct ivopts_data *data)
399 return data->iv_candidates.length ();
402 /* Ith candidate recorded in DATA. */
404 static inline struct iv_cand *
405 iv_cand (struct ivopts_data *data, unsigned i)
407 return data->iv_candidates[i];
410 /* The single loop exit if it dominates the latch, NULL otherwise. */
412 edge
413 single_dom_exit (struct loop *loop)
415 edge exit = single_exit (loop);
417 if (!exit)
418 return NULL;
420 if (!just_once_each_iteration_p (loop, exit->src))
421 return NULL;
423 return exit;
426 /* Dumps information about the induction variable IV to FILE. */
428 extern void dump_iv (FILE *, struct iv *);
429 void
430 dump_iv (FILE *file, struct iv *iv)
432 if (iv->ssa_name)
434 fprintf (file, "ssa name ");
435 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
436 fprintf (file, "\n");
439 fprintf (file, " type ");
440 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
441 fprintf (file, "\n");
443 if (iv->step)
445 fprintf (file, " base ");
446 print_generic_expr (file, iv->base, TDF_SLIM);
447 fprintf (file, "\n");
449 fprintf (file, " step ");
450 print_generic_expr (file, iv->step, TDF_SLIM);
451 fprintf (file, "\n");
453 else
455 fprintf (file, " invariant ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
460 if (iv->base_object)
462 fprintf (file, " base object ");
463 print_generic_expr (file, iv->base_object, TDF_SLIM);
464 fprintf (file, "\n");
467 if (iv->biv_p)
468 fprintf (file, " is a biv\n");
471 /* Dumps information about the USE to FILE. */
473 extern void dump_use (FILE *, struct iv_use *);
474 void
475 dump_use (FILE *file, struct iv_use *use)
477 fprintf (file, "use %d\n", use->id);
479 switch (use->type)
481 case USE_NONLINEAR_EXPR:
482 fprintf (file, " generic\n");
483 break;
485 case USE_ADDRESS:
486 fprintf (file, " address\n");
487 break;
489 case USE_COMPARE:
490 fprintf (file, " compare\n");
491 break;
493 default:
494 gcc_unreachable ();
497 fprintf (file, " in statement ");
498 print_gimple_stmt (file, use->stmt, 0, 0);
499 fprintf (file, "\n");
501 fprintf (file, " at position ");
502 if (use->op_p)
503 print_generic_expr (file, *use->op_p, TDF_SLIM);
504 fprintf (file, "\n");
506 dump_iv (file, use->iv);
508 if (use->related_cands)
510 fprintf (file, " related candidates ");
511 dump_bitmap (file, use->related_cands);
515 /* Dumps information about the uses to FILE. */
517 extern void dump_uses (FILE *, struct ivopts_data *);
518 void
519 dump_uses (FILE *file, struct ivopts_data *data)
521 unsigned i;
522 struct iv_use *use;
524 for (i = 0; i < n_iv_uses (data); i++)
526 use = iv_use (data, i);
528 dump_use (file, use);
529 fprintf (file, "\n");
533 /* Dumps information about induction variable candidate CAND to FILE. */
535 extern void dump_cand (FILE *, struct iv_cand *);
536 void
537 dump_cand (FILE *file, struct iv_cand *cand)
539 struct iv *iv = cand->iv;
541 fprintf (file, "candidate %d%s\n",
542 cand->id, cand->important ? " (important)" : "");
544 if (cand->depends_on)
546 fprintf (file, " depends on ");
547 dump_bitmap (file, cand->depends_on);
550 if (!iv)
552 fprintf (file, " final value replacement\n");
553 return;
556 if (cand->var_before)
558 fprintf (file, " var_before ");
559 print_generic_expr (file, cand->var_before, TDF_SLIM);
560 fprintf (file, "\n");
562 if (cand->var_after)
564 fprintf (file, " var_after ");
565 print_generic_expr (file, cand->var_after, TDF_SLIM);
566 fprintf (file, "\n");
569 switch (cand->pos)
571 case IP_NORMAL:
572 fprintf (file, " incremented before exit test\n");
573 break;
575 case IP_BEFORE_USE:
576 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
577 break;
579 case IP_AFTER_USE:
580 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
581 break;
583 case IP_END:
584 fprintf (file, " incremented at end\n");
585 break;
587 case IP_ORIGINAL:
588 fprintf (file, " original biv\n");
589 break;
592 dump_iv (file, iv);
595 /* Returns the info for ssa version VER. */
597 static inline struct version_info *
598 ver_info (struct ivopts_data *data, unsigned ver)
600 return data->version_info + ver;
603 /* Returns the info for ssa name NAME. */
605 static inline struct version_info *
606 name_info (struct ivopts_data *data, tree name)
608 return ver_info (data, SSA_NAME_VERSION (name));
611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
612 emitted in LOOP. */
614 static bool
615 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
617 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
619 gcc_assert (bb);
621 if (sbb == loop->latch)
622 return true;
624 if (sbb != bb)
625 return false;
627 return stmt == last_stmt (bb);
630 /* Returns true if STMT if after the place where the original induction
631 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
632 if the positions are identical. */
634 static bool
635 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
637 basic_block cand_bb = gimple_bb (cand->incremented_at);
638 basic_block stmt_bb = gimple_bb (stmt);
640 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
641 return false;
643 if (stmt_bb != cand_bb)
644 return true;
646 if (true_if_equal
647 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
648 return true;
649 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
652 /* Returns true if STMT if after the place where the induction variable
653 CAND is incremented in LOOP. */
655 static bool
656 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
658 switch (cand->pos)
660 case IP_END:
661 return false;
663 case IP_NORMAL:
664 return stmt_after_ip_normal_pos (loop, stmt);
666 case IP_ORIGINAL:
667 case IP_AFTER_USE:
668 return stmt_after_inc_pos (cand, stmt, false);
670 case IP_BEFORE_USE:
671 return stmt_after_inc_pos (cand, stmt, true);
673 default:
674 gcc_unreachable ();
678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
680 static bool
681 abnormal_ssa_name_p (tree exp)
683 if (!exp)
684 return false;
686 if (TREE_CODE (exp) != SSA_NAME)
687 return false;
689 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
695 static bool
696 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
697 void *data ATTRIBUTE_UNUSED)
699 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
701 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
702 return false;
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
704 return false;
707 return !abnormal_ssa_name_p (*index);
710 /* Returns true if EXPR contains a ssa name that occurs in an
711 abnormal phi node. */
713 bool
714 contains_abnormal_ssa_name_p (tree expr)
716 enum tree_code code;
717 enum tree_code_class codeclass;
719 if (!expr)
720 return false;
722 code = TREE_CODE (expr);
723 codeclass = TREE_CODE_CLASS (code);
725 if (code == SSA_NAME)
726 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
728 if (code == INTEGER_CST
729 || is_gimple_min_invariant (expr))
730 return false;
732 if (code == ADDR_EXPR)
733 return !for_each_index (&TREE_OPERAND (expr, 0),
734 idx_contains_abnormal_ssa_name_p,
735 NULL);
737 if (code == COND_EXPR)
738 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
739 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
742 switch (codeclass)
744 case tcc_binary:
745 case tcc_comparison:
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
747 return true;
749 /* Fallthru. */
750 case tcc_unary:
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
752 return true;
754 break;
756 default:
757 gcc_unreachable ();
760 return false;
763 /* Returns the structure describing number of iterations determined from
764 EXIT of DATA->current_loop, or NULL if something goes wrong. */
766 static struct tree_niter_desc *
767 niter_for_exit (struct ivopts_data *data, edge exit)
769 struct tree_niter_desc *desc;
770 void **slot;
772 if (!data->niters)
774 data->niters = pointer_map_create ();
775 slot = NULL;
777 else
778 slot = pointer_map_contains (data->niters, exit);
780 if (!slot)
782 /* Try to determine number of iterations. We cannot safely work with ssa
783 names that appear in phi nodes on abnormal edges, so that we do not
784 create overlapping life ranges for them (PR 27283). */
785 desc = XNEW (struct tree_niter_desc);
786 if (!number_of_iterations_exit (data->current_loop,
787 exit, desc, true)
788 || contains_abnormal_ssa_name_p (desc->niter))
790 XDELETE (desc);
791 desc = NULL;
793 slot = pointer_map_insert (data->niters, exit);
794 *slot = desc;
796 else
797 desc = (struct tree_niter_desc *) *slot;
799 return desc;
802 /* Returns the structure describing number of iterations determined from
803 single dominating exit of DATA->current_loop, or NULL if something
804 goes wrong. */
806 static struct tree_niter_desc *
807 niter_for_single_dom_exit (struct ivopts_data *data)
809 edge exit = single_dom_exit (data->current_loop);
811 if (!exit)
812 return NULL;
814 return niter_for_exit (data, exit);
817 /* Hash table equality function for expressions. */
819 static int
820 htab_inv_expr_eq (const void *ent1, const void *ent2)
822 const struct iv_inv_expr_ent *expr1 =
823 (const struct iv_inv_expr_ent *)ent1;
824 const struct iv_inv_expr_ent *expr2 =
825 (const struct iv_inv_expr_ent *)ent2;
827 return expr1->hash == expr2->hash
828 && operand_equal_p (expr1->expr, expr2->expr, 0);
831 /* Hash function for loop invariant expressions. */
833 static hashval_t
834 htab_inv_expr_hash (const void *ent)
836 const struct iv_inv_expr_ent *expr =
837 (const struct iv_inv_expr_ent *)ent;
838 return expr->hash;
841 /* Initializes data structures used by the iv optimization pass, stored
842 in DATA. */
844 static void
845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
847 data->version_info_size = 2 * num_ssa_names;
848 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
849 data->relevant = BITMAP_ALLOC (NULL);
850 data->important_candidates = BITMAP_ALLOC (NULL);
851 data->max_inv_id = 0;
852 data->niters = NULL;
853 data->iv_uses.create (20);
854 data->iv_candidates.create (20);
855 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
856 htab_inv_expr_eq, free);
857 data->inv_expr_id = 0;
858 decl_rtl_to_reset.create (20);
861 /* Returns a memory object to that EXPR points. In case we are able to
862 determine that it does not point to any such object, NULL is returned. */
864 static tree
865 determine_base_object (tree expr)
867 enum tree_code code = TREE_CODE (expr);
868 tree base, obj;
870 /* If this is a pointer casted to any type, we need to determine
871 the base object for the pointer; so handle conversions before
872 throwing away non-pointer expressions. */
873 if (CONVERT_EXPR_P (expr))
874 return determine_base_object (TREE_OPERAND (expr, 0));
876 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
877 return NULL_TREE;
879 switch (code)
881 case INTEGER_CST:
882 return NULL_TREE;
884 case ADDR_EXPR:
885 obj = TREE_OPERAND (expr, 0);
886 base = get_base_address (obj);
888 if (!base)
889 return expr;
891 if (TREE_CODE (base) == MEM_REF)
892 return determine_base_object (TREE_OPERAND (base, 0));
894 return fold_convert (ptr_type_node,
895 build_fold_addr_expr (base));
897 case POINTER_PLUS_EXPR:
898 return determine_base_object (TREE_OPERAND (expr, 0));
900 case PLUS_EXPR:
901 case MINUS_EXPR:
902 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
903 gcc_unreachable ();
905 default:
906 return fold_convert (ptr_type_node, expr);
910 /* Allocates an induction variable with given initial value BASE and step STEP
911 for loop LOOP. */
913 static struct iv *
914 alloc_iv (tree base, tree step)
916 struct iv *iv = XCNEW (struct iv);
917 gcc_assert (step != NULL_TREE);
919 iv->base = base;
920 iv->base_object = determine_base_object (base);
921 iv->step = step;
922 iv->biv_p = false;
923 iv->have_use_for = false;
924 iv->use_id = 0;
925 iv->ssa_name = NULL_TREE;
927 return iv;
930 /* Sets STEP and BASE for induction variable IV. */
932 static void
933 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
935 struct version_info *info = name_info (data, iv);
937 gcc_assert (!info->iv);
939 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
940 info->iv = alloc_iv (base, step);
941 info->iv->ssa_name = iv;
944 /* Finds induction variable declaration for VAR. */
946 static struct iv *
947 get_iv (struct ivopts_data *data, tree var)
949 basic_block bb;
950 tree type = TREE_TYPE (var);
952 if (!POINTER_TYPE_P (type)
953 && !INTEGRAL_TYPE_P (type))
954 return NULL;
956 if (!name_info (data, var)->iv)
958 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
960 if (!bb
961 || !flow_bb_inside_loop_p (data->current_loop, bb))
962 set_iv (data, var, var, build_int_cst (type, 0));
965 return name_info (data, var)->iv;
968 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
969 not define a simple affine biv with nonzero step. */
971 static tree
972 determine_biv_step (gimple phi)
974 struct loop *loop = gimple_bb (phi)->loop_father;
975 tree name = PHI_RESULT (phi);
976 affine_iv iv;
978 if (virtual_operand_p (name))
979 return NULL_TREE;
981 if (!simple_iv (loop, loop, name, &iv, true))
982 return NULL_TREE;
984 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
987 /* Finds basic ivs. */
989 static bool
990 find_bivs (struct ivopts_data *data)
992 gimple phi;
993 tree step, type, base;
994 bool found = false;
995 struct loop *loop = data->current_loop;
996 gimple_stmt_iterator psi;
998 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1000 phi = gsi_stmt (psi);
1002 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1003 continue;
1005 step = determine_biv_step (phi);
1006 if (!step)
1007 continue;
1009 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1010 base = expand_simple_operations (base);
1011 if (contains_abnormal_ssa_name_p (base)
1012 || contains_abnormal_ssa_name_p (step))
1013 continue;
1015 type = TREE_TYPE (PHI_RESULT (phi));
1016 base = fold_convert (type, base);
1017 if (step)
1019 if (POINTER_TYPE_P (type))
1020 step = convert_to_ptrofftype (step);
1021 else
1022 step = fold_convert (type, step);
1025 set_iv (data, PHI_RESULT (phi), base, step);
1026 found = true;
1029 return found;
1032 /* Marks basic ivs. */
1034 static void
1035 mark_bivs (struct ivopts_data *data)
1037 gimple phi;
1038 tree var;
1039 struct iv *iv, *incr_iv;
1040 struct loop *loop = data->current_loop;
1041 basic_block incr_bb;
1042 gimple_stmt_iterator psi;
1044 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1046 phi = gsi_stmt (psi);
1048 iv = get_iv (data, PHI_RESULT (phi));
1049 if (!iv)
1050 continue;
1052 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1053 incr_iv = get_iv (data, var);
1054 if (!incr_iv)
1055 continue;
1057 /* If the increment is in the subloop, ignore it. */
1058 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1059 if (incr_bb->loop_father != data->current_loop
1060 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1061 continue;
1063 iv->biv_p = true;
1064 incr_iv->biv_p = true;
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069 parameters to IV. */
1071 static bool
1072 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1074 tree lhs;
1075 struct loop *loop = data->current_loop;
1077 iv->base = NULL_TREE;
1078 iv->step = NULL_TREE;
1080 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1081 return false;
1083 lhs = gimple_assign_lhs (stmt);
1084 if (TREE_CODE (lhs) != SSA_NAME)
1085 return false;
1087 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1088 return false;
1089 iv->base = expand_simple_operations (iv->base);
1091 if (contains_abnormal_ssa_name_p (iv->base)
1092 || contains_abnormal_ssa_name_p (iv->step))
1093 return false;
1095 /* If STMT could throw, then do not consider STMT as defining a GIV.
1096 While this will suppress optimizations, we can not safely delete this
1097 GIV and associated statements, even if it appears it is not used. */
1098 if (stmt_could_throw_p (stmt))
1099 return false;
1101 return true;
1104 /* Finds general ivs in statement STMT. */
1106 static void
1107 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1109 affine_iv iv;
1111 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1112 return;
1114 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1117 /* Finds general ivs in basic block BB. */
1119 static void
1120 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1122 gimple_stmt_iterator bsi;
1124 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1125 find_givs_in_stmt (data, gsi_stmt (bsi));
1128 /* Finds general ivs. */
1130 static void
1131 find_givs (struct ivopts_data *data)
1133 struct loop *loop = data->current_loop;
1134 basic_block *body = get_loop_body_in_dom_order (loop);
1135 unsigned i;
1137 for (i = 0; i < loop->num_nodes; i++)
1138 find_givs_in_bb (data, body[i]);
1139 free (body);
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143 variable and if so, its initial value and step. */
1145 static bool
1146 find_induction_variables (struct ivopts_data *data)
1148 unsigned i;
1149 bitmap_iterator bi;
1151 if (!find_bivs (data))
1152 return false;
1154 find_givs (data);
1155 mark_bivs (data);
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1159 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1161 if (niter)
1163 fprintf (dump_file, " number of iterations ");
1164 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1165 if (!integer_zerop (niter->may_be_zero))
1167 fprintf (dump_file, "; zero if ");
1168 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1170 fprintf (dump_file, "\n\n");
1173 fprintf (dump_file, "Induction variables:\n\n");
1175 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1177 if (ver_info (data, i)->iv)
1178 dump_iv (dump_file, ver_info (data, i)->iv);
1182 return true;
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1187 static struct iv_use *
1188 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1189 gimple stmt, enum use_type use_type)
1191 struct iv_use *use = XCNEW (struct iv_use);
1193 use->id = n_iv_uses (data);
1194 use->type = use_type;
1195 use->iv = iv;
1196 use->stmt = stmt;
1197 use->op_p = use_p;
1198 use->related_cands = BITMAP_ALLOC (NULL);
1200 /* To avoid showing ssa name in the dumps, if it was not reset by the
1201 caller. */
1202 iv->ssa_name = NULL_TREE;
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 dump_use (dump_file, use);
1207 data->iv_uses.safe_push (use);
1209 return use;
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213 NONLINEAR_USE is true if the invariant is used in a way we do not
1214 handle specially. */
1216 static void
1217 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1219 basic_block bb;
1220 struct version_info *info;
1222 if (TREE_CODE (op) != SSA_NAME
1223 || virtual_operand_p (op))
1224 return;
1226 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1227 if (bb
1228 && flow_bb_inside_loop_p (data->current_loop, bb))
1229 return;
1231 info = name_info (data, op);
1232 info->name = op;
1233 info->has_nonlin_use |= nonlinear_use;
1234 if (!info->inv_id)
1235 info->inv_id = ++data->max_inv_id;
1236 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1239 /* Checks whether the use OP is interesting and if so, records it. */
1241 static struct iv_use *
1242 find_interesting_uses_op (struct ivopts_data *data, tree op)
1244 struct iv *iv;
1245 struct iv *civ;
1246 gimple stmt;
1247 struct iv_use *use;
1249 if (TREE_CODE (op) != SSA_NAME)
1250 return NULL;
1252 iv = get_iv (data, op);
1253 if (!iv)
1254 return NULL;
1256 if (iv->have_use_for)
1258 use = iv_use (data, iv->use_id);
1260 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1261 return use;
1264 if (integer_zerop (iv->step))
1266 record_invariant (data, op, true);
1267 return NULL;
1269 iv->have_use_for = true;
1271 civ = XNEW (struct iv);
1272 *civ = *iv;
1274 stmt = SSA_NAME_DEF_STMT (op);
1275 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1276 || is_gimple_assign (stmt));
1278 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1279 iv->use_id = use->id;
1281 return use;
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285 of an induction variable and an invariant. If this is the case,
1286 CONTROL_VAR is set to location of the iv, BOUND to the location of
1287 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288 induction variable descriptions, and true is returned. If this is not
1289 the case, CONTROL_VAR and BOUND are set to the arguments of the
1290 condition and false is returned. */
1292 static bool
1293 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1294 tree **control_var, tree **bound,
1295 struct iv **iv_var, struct iv **iv_bound)
1297 /* The objects returned when COND has constant operands. */
1298 static struct iv const_iv;
1299 static tree zero;
1300 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1301 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1302 bool ret = false;
1304 if (gimple_code (stmt) == GIMPLE_COND)
1306 op0 = gimple_cond_lhs_ptr (stmt);
1307 op1 = gimple_cond_rhs_ptr (stmt);
1309 else
1311 op0 = gimple_assign_rhs1_ptr (stmt);
1312 op1 = gimple_assign_rhs2_ptr (stmt);
1315 zero = integer_zero_node;
1316 const_iv.step = integer_zero_node;
1318 if (TREE_CODE (*op0) == SSA_NAME)
1319 iv0 = get_iv (data, *op0);
1320 if (TREE_CODE (*op1) == SSA_NAME)
1321 iv1 = get_iv (data, *op1);
1323 /* Exactly one of the compared values must be an iv, and the other one must
1324 be an invariant. */
1325 if (!iv0 || !iv1)
1326 goto end;
1328 if (integer_zerop (iv0->step))
1330 /* Control variable may be on the other side. */
1331 tmp_op = op0; op0 = op1; op1 = tmp_op;
1332 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1334 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1336 end:
1337 if (control_var)
1338 *control_var = op0;;
1339 if (iv_var)
1340 *iv_var = iv0;;
1341 if (bound)
1342 *bound = op1;
1343 if (iv_bound)
1344 *iv_bound = iv1;
1346 return ret;
1349 /* Checks whether the condition in STMT is interesting and if so,
1350 records it. */
1352 static void
1353 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1355 tree *var_p, *bound_p;
1356 struct iv *var_iv, *civ;
1358 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1360 find_interesting_uses_op (data, *var_p);
1361 find_interesting_uses_op (data, *bound_p);
1362 return;
1365 civ = XNEW (struct iv);
1366 *civ = *var_iv;
1367 record_use (data, NULL, civ, stmt, USE_COMPARE);
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371 i.e. if all its operands are defined outside of the LOOP. LOOP
1372 should not be the function body. */
1374 bool
1375 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1377 basic_block def_bb;
1378 unsigned i, len;
1380 gcc_assert (loop_depth (loop) > 0);
1382 if (is_gimple_min_invariant (expr))
1383 return true;
1385 if (TREE_CODE (expr) == SSA_NAME)
1387 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1388 if (def_bb
1389 && flow_bb_inside_loop_p (loop, def_bb))
1390 return false;
1392 return true;
1395 if (!EXPR_P (expr))
1396 return false;
1398 len = TREE_OPERAND_LENGTH (expr);
1399 for (i = 0; i < len; i++)
1400 if (TREE_OPERAND (expr, i)
1401 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1402 return false;
1404 return true;
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408 i.e. if all its operands on the RHS are defined outside of the LOOP.
1409 LOOP should not be the function body. */
1411 bool
1412 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1414 unsigned i;
1415 tree lhs;
1417 gcc_assert (loop_depth (loop) > 0);
1419 lhs = gimple_get_lhs (stmt);
1420 for (i = 0; i < gimple_num_ops (stmt); i++)
1422 tree op = gimple_op (stmt, i);
1423 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1424 return false;
1427 return true;
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431 initial ones. Returns false when the value of the index cannot be determined.
1432 Callback for for_each_index. */
1434 struct ifs_ivopts_data
1436 struct ivopts_data *ivopts_data;
1437 gimple stmt;
1438 tree step;
1441 static bool
1442 idx_find_step (tree base, tree *idx, void *data)
1444 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1445 struct iv *iv;
1446 tree step, iv_base, iv_step, lbound, off;
1447 struct loop *loop = dta->ivopts_data->current_loop;
1449 /* If base is a component ref, require that the offset of the reference
1450 be invariant. */
1451 if (TREE_CODE (base) == COMPONENT_REF)
1453 off = component_ref_field_offset (base);
1454 return expr_invariant_in_loop_p (loop, off);
1457 /* If base is array, first check whether we will be able to move the
1458 reference out of the loop (in order to take its address in strength
1459 reduction). In order for this to work we need both lower bound
1460 and step to be loop invariants. */
1461 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1463 /* Moreover, for a range, the size needs to be invariant as well. */
1464 if (TREE_CODE (base) == ARRAY_RANGE_REF
1465 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466 return false;
1468 step = array_ref_element_size (base);
1469 lbound = array_ref_low_bound (base);
1471 if (!expr_invariant_in_loop_p (loop, step)
1472 || !expr_invariant_in_loop_p (loop, lbound))
1473 return false;
1476 if (TREE_CODE (*idx) != SSA_NAME)
1477 return true;
1479 iv = get_iv (dta->ivopts_data, *idx);
1480 if (!iv)
1481 return false;
1483 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1484 *&x[0], which is not folded and does not trigger the
1485 ARRAY_REF path below. */
1486 *idx = iv->base;
1488 if (integer_zerop (iv->step))
1489 return true;
1491 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1493 step = array_ref_element_size (base);
1495 /* We only handle addresses whose step is an integer constant. */
1496 if (TREE_CODE (step) != INTEGER_CST)
1497 return false;
1499 else
1500 /* The step for pointer arithmetics already is 1 byte. */
1501 step = size_one_node;
1503 iv_base = iv->base;
1504 iv_step = iv->step;
1505 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506 sizetype, &iv_base, &iv_step, dta->stmt,
1507 false))
1509 /* The index might wrap. */
1510 return false;
1513 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1516 return true;
1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1520 object is passed to it in DATA. */
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524 void *vdata)
1526 struct ivopts_data *data = (struct ivopts_data *) vdata;
1527 find_interesting_uses_op (data, *idx);
1528 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1530 find_interesting_uses_op (data, array_ref_element_size (base));
1531 find_interesting_uses_op (data, array_ref_low_bound (base));
1533 return true;
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537 store cst to MUL and return true. Otherwise return false.
1538 The returned value is always sign-extended, regardless of the
1539 signedness of TOP and BOT. */
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1544 tree mby;
1545 enum tree_code code;
1546 double_int res, p0, p1;
1547 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1549 STRIP_NOPS (top);
1550 STRIP_NOPS (bot);
1552 if (operand_equal_p (top, bot, 0))
1554 *mul = double_int_one;
1555 return true;
1558 code = TREE_CODE (top);
1559 switch (code)
1561 case MULT_EXPR:
1562 mby = TREE_OPERAND (top, 1);
1563 if (TREE_CODE (mby) != INTEGER_CST)
1564 return false;
1566 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567 return false;
1569 *mul = (res * tree_to_double_int (mby)).sext (precision);
1570 return true;
1572 case PLUS_EXPR:
1573 case MINUS_EXPR:
1574 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1575 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1576 return false;
1578 if (code == MINUS_EXPR)
1579 p1 = -p1;
1580 *mul = (p0 + p1).sext (precision);
1581 return true;
1583 case INTEGER_CST:
1584 if (TREE_CODE (bot) != INTEGER_CST)
1585 return false;
1587 p0 = tree_to_double_int (top).sext (precision);
1588 p1 = tree_to_double_int (bot).sext (precision);
1589 if (p1.is_zero ())
1590 return false;
1591 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1592 return res.is_zero ();
1594 default:
1595 return false;
1599 /* Returns true if memory reference REF with step STEP may be unaligned. */
1601 static bool
1602 may_be_unaligned_p (tree ref, tree step)
1604 tree base;
1605 tree base_type;
1606 HOST_WIDE_INT bitsize;
1607 HOST_WIDE_INT bitpos;
1608 tree toffset;
1609 enum machine_mode mode;
1610 int unsignedp, volatilep;
1611 unsigned base_align;
1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614 thus they are not misaligned. */
1615 if (TREE_CODE (ref) == TARGET_MEM_REF)
1616 return false;
1618 /* The test below is basically copy of what expr.c:normal_inner_ref
1619 does to check whether the object must be loaded by parts when
1620 STRICT_ALIGNMENT is true. */
1621 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622 &unsignedp, &volatilep, true);
1623 base_type = TREE_TYPE (base);
1624 base_align = get_object_alignment (base);
1625 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1627 if (mode != BLKmode)
1629 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631 if (base_align < mode_align
1632 || (bitpos % mode_align) != 0
1633 || (bitpos % BITS_PER_UNIT) != 0)
1634 return true;
1636 if (toffset
1637 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1638 return true;
1640 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1641 return true;
1644 return false;
1647 /* Return true if EXPR may be non-addressable. */
1649 bool
1650 may_be_nonaddressable_p (tree expr)
1652 switch (TREE_CODE (expr))
1654 case TARGET_MEM_REF:
1655 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656 target, thus they are always addressable. */
1657 return false;
1659 case COMPONENT_REF:
1660 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1661 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663 case VIEW_CONVERT_EXPR:
1664 /* This kind of view-conversions may wrap non-addressable objects
1665 and make them look addressable. After some processing the
1666 non-addressability may be uncovered again, causing ADDR_EXPRs
1667 of inappropriate objects to be built. */
1668 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1669 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1670 return true;
1672 /* ... fall through ... */
1674 case ARRAY_REF:
1675 case ARRAY_RANGE_REF:
1676 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678 CASE_CONVERT:
1679 return true;
1681 default:
1682 break;
1685 return false;
1688 /* Finds addresses in *OP_P inside STMT. */
1690 static void
1691 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 tree base = *op_p, step = size_zero_node;
1694 struct iv *civ;
1695 struct ifs_ivopts_data ifs_ivopts_data;
1697 /* Do not play with volatile memory references. A bit too conservative,
1698 perhaps, but safe. */
1699 if (gimple_has_volatile_ops (stmt))
1700 goto fail;
1702 /* Ignore bitfields for now. Not really something terribly complicated
1703 to handle. TODO. */
1704 if (TREE_CODE (base) == BIT_FIELD_REF)
1705 goto fail;
1707 base = unshare_expr (base);
1709 if (TREE_CODE (base) == TARGET_MEM_REF)
1711 tree type = build_pointer_type (TREE_TYPE (base));
1712 tree astep;
1714 if (TMR_BASE (base)
1715 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717 civ = get_iv (data, TMR_BASE (base));
1718 if (!civ)
1719 goto fail;
1721 TMR_BASE (base) = civ->base;
1722 step = civ->step;
1724 if (TMR_INDEX2 (base)
1725 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1727 civ = get_iv (data, TMR_INDEX2 (base));
1728 if (!civ)
1729 goto fail;
1731 TMR_INDEX2 (base) = civ->base;
1732 step = civ->step;
1734 if (TMR_INDEX (base)
1735 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1737 civ = get_iv (data, TMR_INDEX (base));
1738 if (!civ)
1739 goto fail;
1741 TMR_INDEX (base) = civ->base;
1742 astep = civ->step;
1744 if (astep)
1746 if (TMR_STEP (base))
1747 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1749 step = fold_build2 (PLUS_EXPR, type, step, astep);
1753 if (integer_zerop (step))
1754 goto fail;
1755 base = tree_mem_ref_addr (type, base);
1757 else
1759 ifs_ivopts_data.ivopts_data = data;
1760 ifs_ivopts_data.stmt = stmt;
1761 ifs_ivopts_data.step = size_zero_node;
1762 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1763 || integer_zerop (ifs_ivopts_data.step))
1764 goto fail;
1765 step = ifs_ivopts_data.step;
1767 /* Check that the base expression is addressable. This needs
1768 to be done after substituting bases of IVs into it. */
1769 if (may_be_nonaddressable_p (base))
1770 goto fail;
1772 /* Moreover, on strict alignment platforms, check that it is
1773 sufficiently aligned. */
1774 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1775 goto fail;
1777 base = build_fold_addr_expr (base);
1779 /* Substituting bases of IVs into the base expression might
1780 have caused folding opportunities. */
1781 if (TREE_CODE (base) == ADDR_EXPR)
1783 tree *ref = &TREE_OPERAND (base, 0);
1784 while (handled_component_p (*ref))
1785 ref = &TREE_OPERAND (*ref, 0);
1786 if (TREE_CODE (*ref) == MEM_REF)
1788 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1789 TREE_OPERAND (*ref, 0),
1790 TREE_OPERAND (*ref, 1));
1791 if (tem)
1792 *ref = tem;
1797 civ = alloc_iv (base, step);
1798 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1799 return;
1801 fail:
1802 for_each_index (op_p, idx_record_use, data);
1805 /* Finds and records invariants used in STMT. */
1807 static void
1808 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1810 ssa_op_iter iter;
1811 use_operand_p use_p;
1812 tree op;
1814 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1816 op = USE_FROM_PTR (use_p);
1817 record_invariant (data, op, false);
1821 /* Finds interesting uses of induction variables in the statement STMT. */
1823 static void
1824 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1826 struct iv *iv;
1827 tree op, *lhs, *rhs;
1828 ssa_op_iter iter;
1829 use_operand_p use_p;
1830 enum tree_code code;
1832 find_invariants_stmt (data, stmt);
1834 if (gimple_code (stmt) == GIMPLE_COND)
1836 find_interesting_uses_cond (data, stmt);
1837 return;
1840 if (is_gimple_assign (stmt))
1842 lhs = gimple_assign_lhs_ptr (stmt);
1843 rhs = gimple_assign_rhs1_ptr (stmt);
1845 if (TREE_CODE (*lhs) == SSA_NAME)
1847 /* If the statement defines an induction variable, the uses are not
1848 interesting by themselves. */
1850 iv = get_iv (data, *lhs);
1852 if (iv && !integer_zerop (iv->step))
1853 return;
1856 code = gimple_assign_rhs_code (stmt);
1857 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1858 && (REFERENCE_CLASS_P (*rhs)
1859 || is_gimple_val (*rhs)))
1861 if (REFERENCE_CLASS_P (*rhs))
1862 find_interesting_uses_address (data, stmt, rhs);
1863 else
1864 find_interesting_uses_op (data, *rhs);
1866 if (REFERENCE_CLASS_P (*lhs))
1867 find_interesting_uses_address (data, stmt, lhs);
1868 return;
1870 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1872 find_interesting_uses_cond (data, stmt);
1873 return;
1876 /* TODO -- we should also handle address uses of type
1878 memory = call (whatever);
1882 call (memory). */
1885 if (gimple_code (stmt) == GIMPLE_PHI
1886 && gimple_bb (stmt) == data->current_loop->header)
1888 iv = get_iv (data, PHI_RESULT (stmt));
1890 if (iv && !integer_zerop (iv->step))
1891 return;
1894 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1896 op = USE_FROM_PTR (use_p);
1898 if (TREE_CODE (op) != SSA_NAME)
1899 continue;
1901 iv = get_iv (data, op);
1902 if (!iv)
1903 continue;
1905 find_interesting_uses_op (data, op);
1909 /* Finds interesting uses of induction variables outside of loops
1910 on loop exit edge EXIT. */
1912 static void
1913 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1915 gimple phi;
1916 gimple_stmt_iterator psi;
1917 tree def;
1919 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1921 phi = gsi_stmt (psi);
1922 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1923 if (!virtual_operand_p (def))
1924 find_interesting_uses_op (data, def);
1928 /* Finds uses of the induction variables that are interesting. */
1930 static void
1931 find_interesting_uses (struct ivopts_data *data)
1933 basic_block bb;
1934 gimple_stmt_iterator bsi;
1935 basic_block *body = get_loop_body (data->current_loop);
1936 unsigned i;
1937 struct version_info *info;
1938 edge e;
1940 if (dump_file && (dump_flags & TDF_DETAILS))
1941 fprintf (dump_file, "Uses:\n\n");
1943 for (i = 0; i < data->current_loop->num_nodes; i++)
1945 edge_iterator ei;
1946 bb = body[i];
1948 FOR_EACH_EDGE (e, ei, bb->succs)
1949 if (e->dest != EXIT_BLOCK_PTR
1950 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1951 find_interesting_uses_outside (data, e);
1953 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1955 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1956 if (!is_gimple_debug (gsi_stmt (bsi)))
1957 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960 if (dump_file && (dump_flags & TDF_DETAILS))
1962 bitmap_iterator bi;
1964 fprintf (dump_file, "\n");
1966 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1968 info = ver_info (data, i);
1969 if (info->inv_id)
1971 fprintf (dump_file, " ");
1972 print_generic_expr (dump_file, info->name, TDF_SLIM);
1973 fprintf (dump_file, " is invariant (%d)%s\n",
1974 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1978 fprintf (dump_file, "\n");
1981 free (body);
1984 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1985 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1986 we are at the top-level of the processed address. */
1988 static tree
1989 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1990 unsigned HOST_WIDE_INT *offset)
1992 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1993 enum tree_code code;
1994 tree type, orig_type = TREE_TYPE (expr);
1995 unsigned HOST_WIDE_INT off0, off1, st;
1996 tree orig_expr = expr;
1998 STRIP_NOPS (expr);
2000 type = TREE_TYPE (expr);
2001 code = TREE_CODE (expr);
2002 *offset = 0;
2004 switch (code)
2006 case INTEGER_CST:
2007 if (!cst_and_fits_in_hwi (expr)
2008 || integer_zerop (expr))
2009 return orig_expr;
2011 *offset = int_cst_value (expr);
2012 return build_int_cst (orig_type, 0);
2014 case POINTER_PLUS_EXPR:
2015 case PLUS_EXPR:
2016 case MINUS_EXPR:
2017 op0 = TREE_OPERAND (expr, 0);
2018 op1 = TREE_OPERAND (expr, 1);
2020 op0 = strip_offset_1 (op0, false, false, &off0);
2021 op1 = strip_offset_1 (op1, false, false, &off1);
2023 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2024 if (op0 == TREE_OPERAND (expr, 0)
2025 && op1 == TREE_OPERAND (expr, 1))
2026 return orig_expr;
2028 if (integer_zerop (op1))
2029 expr = op0;
2030 else if (integer_zerop (op0))
2032 if (code == MINUS_EXPR)
2033 expr = fold_build1 (NEGATE_EXPR, type, op1);
2034 else
2035 expr = op1;
2037 else
2038 expr = fold_build2 (code, type, op0, op1);
2040 return fold_convert (orig_type, expr);
2042 case MULT_EXPR:
2043 op1 = TREE_OPERAND (expr, 1);
2044 if (!cst_and_fits_in_hwi (op1))
2045 return orig_expr;
2047 op0 = TREE_OPERAND (expr, 0);
2048 op0 = strip_offset_1 (op0, false, false, &off0);
2049 if (op0 == TREE_OPERAND (expr, 0))
2050 return orig_expr;
2052 *offset = off0 * int_cst_value (op1);
2053 if (integer_zerop (op0))
2054 expr = op0;
2055 else
2056 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2058 return fold_convert (orig_type, expr);
2060 case ARRAY_REF:
2061 case ARRAY_RANGE_REF:
2062 if (!inside_addr)
2063 return orig_expr;
2065 step = array_ref_element_size (expr);
2066 if (!cst_and_fits_in_hwi (step))
2067 break;
2069 st = int_cst_value (step);
2070 op1 = TREE_OPERAND (expr, 1);
2071 op1 = strip_offset_1 (op1, false, false, &off1);
2072 *offset = off1 * st;
2074 if (top_compref
2075 && integer_zerop (op1))
2077 /* Strip the component reference completely. */
2078 op0 = TREE_OPERAND (expr, 0);
2079 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2080 *offset += off0;
2081 return op0;
2083 break;
2085 case COMPONENT_REF:
2086 if (!inside_addr)
2087 return orig_expr;
2089 tmp = component_ref_field_offset (expr);
2090 if (top_compref
2091 && cst_and_fits_in_hwi (tmp))
2093 /* Strip the component reference completely. */
2094 op0 = TREE_OPERAND (expr, 0);
2095 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2096 *offset = off0 + int_cst_value (tmp);
2097 return op0;
2099 break;
2101 case ADDR_EXPR:
2102 op0 = TREE_OPERAND (expr, 0);
2103 op0 = strip_offset_1 (op0, true, true, &off0);
2104 *offset += off0;
2106 if (op0 == TREE_OPERAND (expr, 0))
2107 return orig_expr;
2109 expr = build_fold_addr_expr (op0);
2110 return fold_convert (orig_type, expr);
2112 case MEM_REF:
2113 /* ??? Offset operand? */
2114 inside_addr = false;
2115 break;
2117 default:
2118 return orig_expr;
2121 /* Default handling of expressions for that we want to recurse into
2122 the first operand. */
2123 op0 = TREE_OPERAND (expr, 0);
2124 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2125 *offset += off0;
2127 if (op0 == TREE_OPERAND (expr, 0)
2128 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2129 return orig_expr;
2131 expr = copy_node (expr);
2132 TREE_OPERAND (expr, 0) = op0;
2133 if (op1)
2134 TREE_OPERAND (expr, 1) = op1;
2136 /* Inside address, we might strip the top level component references,
2137 thus changing type of the expression. Handling of ADDR_EXPR
2138 will fix that. */
2139 expr = fold_convert (orig_type, expr);
2141 return expr;
2144 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2146 static tree
2147 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2149 return strip_offset_1 (expr, false, false, offset);
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153 We return unsigned type with the same precision, which avoids problems
2154 with overflows. */
2156 static tree
2157 generic_type_for (tree type)
2159 if (POINTER_TYPE_P (type))
2160 return unsigned_type_for (type);
2162 if (TYPE_UNSIGNED (type))
2163 return type;
2165 return unsigned_type_for (type);
2168 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2169 the bitmap to that we should store it. */
2171 static struct ivopts_data *fd_ivopts_data;
2172 static tree
2173 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2175 bitmap *depends_on = (bitmap *) data;
2176 struct version_info *info;
2178 if (TREE_CODE (*expr_p) != SSA_NAME)
2179 return NULL_TREE;
2180 info = name_info (fd_ivopts_data, *expr_p);
2182 if (!info->inv_id || info->has_nonlin_use)
2183 return NULL_TREE;
2185 if (!*depends_on)
2186 *depends_on = BITMAP_ALLOC (NULL);
2187 bitmap_set_bit (*depends_on, info->inv_id);
2189 return NULL_TREE;
2192 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2193 position to POS. If USE is not NULL, the candidate is set as related to
2194 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2195 replacement of the final value of the iv by a direct computation. */
2197 static struct iv_cand *
2198 add_candidate_1 (struct ivopts_data *data,
2199 tree base, tree step, bool important, enum iv_position pos,
2200 struct iv_use *use, gimple incremented_at)
2202 unsigned i;
2203 struct iv_cand *cand = NULL;
2204 tree type, orig_type;
2206 /* For non-original variables, make sure their values are computed in a type
2207 that does not invoke undefined behavior on overflows (since in general,
2208 we cannot prove that these induction variables are non-wrapping). */
2209 if (pos != IP_ORIGINAL)
2211 orig_type = TREE_TYPE (base);
2212 type = generic_type_for (orig_type);
2213 if (type != orig_type)
2215 base = fold_convert (type, base);
2216 step = fold_convert (type, step);
2220 for (i = 0; i < n_iv_cands (data); i++)
2222 cand = iv_cand (data, i);
2224 if (cand->pos != pos)
2225 continue;
2227 if (cand->incremented_at != incremented_at
2228 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2229 && cand->ainc_use != use))
2230 continue;
2232 if (!cand->iv)
2234 if (!base && !step)
2235 break;
2237 continue;
2240 if (!base && !step)
2241 continue;
2243 if (operand_equal_p (base, cand->iv->base, 0)
2244 && operand_equal_p (step, cand->iv->step, 0)
2245 && (TYPE_PRECISION (TREE_TYPE (base))
2246 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2247 break;
2250 if (i == n_iv_cands (data))
2252 cand = XCNEW (struct iv_cand);
2253 cand->id = i;
2255 if (!base && !step)
2256 cand->iv = NULL;
2257 else
2258 cand->iv = alloc_iv (base, step);
2260 cand->pos = pos;
2261 if (pos != IP_ORIGINAL && cand->iv)
2263 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2264 cand->var_after = cand->var_before;
2266 cand->important = important;
2267 cand->incremented_at = incremented_at;
2268 data->iv_candidates.safe_push (cand);
2270 if (step
2271 && TREE_CODE (step) != INTEGER_CST)
2273 fd_ivopts_data = data;
2274 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2277 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2278 cand->ainc_use = use;
2279 else
2280 cand->ainc_use = NULL;
2282 if (dump_file && (dump_flags & TDF_DETAILS))
2283 dump_cand (dump_file, cand);
2286 if (important && !cand->important)
2288 cand->important = true;
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2290 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2293 if (use)
2295 bitmap_set_bit (use->related_cands, i);
2296 if (dump_file && (dump_flags & TDF_DETAILS))
2297 fprintf (dump_file, "Candidate %d is related to use %d\n",
2298 cand->id, use->id);
2301 return cand;
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2305 is allowed.
2307 The purpose is to avoid splitting latch edge with a biv increment, thus
2308 creating a jump, possibly confusing other optimization passes and leaving
2309 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2310 is not available (so we do not have a better alternative), or if the latch
2311 edge is already nonempty. */
2313 static bool
2314 allow_ip_end_pos_p (struct loop *loop)
2316 if (!ip_normal_pos (loop))
2317 return true;
2319 if (!empty_block_p (ip_end_pos (loop)))
2320 return true;
2322 return false;
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326 Important field is set to IMPORTANT. */
2328 static void
2329 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2330 bool important, struct iv_use *use)
2332 basic_block use_bb = gimple_bb (use->stmt);
2333 enum machine_mode mem_mode;
2334 unsigned HOST_WIDE_INT cstepi;
2336 /* If we insert the increment in any position other than the standard
2337 ones, we must ensure that it is incremented once per iteration.
2338 It must not be in an inner nested loop, or one side of an if
2339 statement. */
2340 if (use_bb->loop_father != data->current_loop
2341 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2342 || stmt_could_throw_p (use->stmt)
2343 || !cst_and_fits_in_hwi (step))
2344 return;
2346 cstepi = int_cst_value (step);
2348 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2349 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2350 || USE_STORE_PRE_INCREMENT (mem_mode))
2351 && GET_MODE_SIZE (mem_mode) == cstepi)
2352 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2353 || USE_STORE_PRE_DECREMENT (mem_mode))
2354 && GET_MODE_SIZE (mem_mode) == -cstepi))
2356 enum tree_code code = MINUS_EXPR;
2357 tree new_base;
2358 tree new_step = step;
2360 if (POINTER_TYPE_P (TREE_TYPE (base)))
2362 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2363 code = POINTER_PLUS_EXPR;
2365 else
2366 new_step = fold_convert (TREE_TYPE (base), new_step);
2367 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2368 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2369 use->stmt);
2371 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2372 || USE_STORE_POST_INCREMENT (mem_mode))
2373 && GET_MODE_SIZE (mem_mode) == cstepi)
2374 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2375 || USE_STORE_POST_DECREMENT (mem_mode))
2376 && GET_MODE_SIZE (mem_mode) == -cstepi))
2378 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2379 use->stmt);
2383 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2384 position to POS. If USE is not NULL, the candidate is set as related to
2385 it. The candidate computation is scheduled on all available positions. */
2387 static void
2388 add_candidate (struct ivopts_data *data,
2389 tree base, tree step, bool important, struct iv_use *use)
2391 if (ip_normal_pos (data->current_loop))
2392 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2393 if (ip_end_pos (data->current_loop)
2394 && allow_ip_end_pos_p (data->current_loop))
2395 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2397 if (use != NULL && use->type == USE_ADDRESS)
2398 add_autoinc_candidates (data, base, step, important, use);
2401 /* Adds standard iv candidates. */
2403 static void
2404 add_standard_iv_candidates (struct ivopts_data *data)
2406 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2408 /* The same for a double-integer type if it is still fast enough. */
2409 if (TYPE_PRECISION
2410 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2411 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2412 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2413 build_int_cst (long_integer_type_node, 1), true, NULL);
2415 /* The same for a double-integer type if it is still fast enough. */
2416 if (TYPE_PRECISION
2417 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2418 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2419 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2420 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2424 /* Adds candidates bases on the old induction variable IV. */
2426 static void
2427 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2429 gimple phi;
2430 tree def;
2431 struct iv_cand *cand;
2433 add_candidate (data, iv->base, iv->step, true, NULL);
2435 /* The same, but with initial value zero. */
2436 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2437 add_candidate (data, size_int (0), iv->step, true, NULL);
2438 else
2439 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2440 iv->step, true, NULL);
2442 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2443 if (gimple_code (phi) == GIMPLE_PHI)
2445 /* Additionally record the possibility of leaving the original iv
2446 untouched. */
2447 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2448 cand = add_candidate_1 (data,
2449 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2450 SSA_NAME_DEF_STMT (def));
2451 cand->var_before = iv->ssa_name;
2452 cand->var_after = def;
2456 /* Adds candidates based on the old induction variables. */
2458 static void
2459 add_old_ivs_candidates (struct ivopts_data *data)
2461 unsigned i;
2462 struct iv *iv;
2463 bitmap_iterator bi;
2465 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2467 iv = ver_info (data, i)->iv;
2468 if (iv && iv->biv_p && !integer_zerop (iv->step))
2469 add_old_iv_candidates (data, iv);
2473 /* Adds candidates based on the value of the induction variable IV and USE. */
2475 static void
2476 add_iv_value_candidates (struct ivopts_data *data,
2477 struct iv *iv, struct iv_use *use)
2479 unsigned HOST_WIDE_INT offset;
2480 tree base;
2481 tree basetype;
2483 add_candidate (data, iv->base, iv->step, false, use);
2485 /* The same, but with initial value zero. Make such variable important,
2486 since it is generic enough so that possibly many uses may be based
2487 on it. */
2488 basetype = TREE_TYPE (iv->base);
2489 if (POINTER_TYPE_P (basetype))
2490 basetype = sizetype;
2491 add_candidate (data, build_int_cst (basetype, 0),
2492 iv->step, true, use);
2494 /* Third, try removing the constant offset. Make sure to even
2495 add a candidate for &a[0] vs. (T *)&a. */
2496 base = strip_offset (iv->base, &offset);
2497 if (offset
2498 || base != iv->base)
2499 add_candidate (data, base, iv->step, false, use);
2502 /* Adds candidates based on the uses. */
2504 static void
2505 add_derived_ivs_candidates (struct ivopts_data *data)
2507 unsigned i;
2509 for (i = 0; i < n_iv_uses (data); i++)
2511 struct iv_use *use = iv_use (data, i);
2513 if (!use)
2514 continue;
2516 switch (use->type)
2518 case USE_NONLINEAR_EXPR:
2519 case USE_COMPARE:
2520 case USE_ADDRESS:
2521 /* Just add the ivs based on the value of the iv used here. */
2522 add_iv_value_candidates (data, use->iv, use);
2523 break;
2525 default:
2526 gcc_unreachable ();
2531 /* Record important candidates and add them to related_cands bitmaps
2532 if needed. */
2534 static void
2535 record_important_candidates (struct ivopts_data *data)
2537 unsigned i;
2538 struct iv_use *use;
2540 for (i = 0; i < n_iv_cands (data); i++)
2542 struct iv_cand *cand = iv_cand (data, i);
2544 if (cand->important)
2545 bitmap_set_bit (data->important_candidates, i);
2548 data->consider_all_candidates = (n_iv_cands (data)
2549 <= CONSIDER_ALL_CANDIDATES_BOUND);
2551 if (data->consider_all_candidates)
2553 /* We will not need "related_cands" bitmaps in this case,
2554 so release them to decrease peak memory consumption. */
2555 for (i = 0; i < n_iv_uses (data); i++)
2557 use = iv_use (data, i);
2558 BITMAP_FREE (use->related_cands);
2561 else
2563 /* Add important candidates to the related_cands bitmaps. */
2564 for (i = 0; i < n_iv_uses (data); i++)
2565 bitmap_ior_into (iv_use (data, i)->related_cands,
2566 data->important_candidates);
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572 we allocate a simple list to every use. */
2574 static void
2575 alloc_use_cost_map (struct ivopts_data *data)
2577 unsigned i, size, s, j;
2579 for (i = 0; i < n_iv_uses (data); i++)
2581 struct iv_use *use = iv_use (data, i);
2582 bitmap_iterator bi;
2584 if (data->consider_all_candidates)
2585 size = n_iv_cands (data);
2586 else
2588 s = 0;
2589 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2591 s++;
2594 /* Round up to the power of two, so that moduling by it is fast. */
2595 for (size = 1; size < s; size <<= 1)
2596 continue;
2599 use->n_map_members = size;
2600 use->cost_map = XCNEWVEC (struct cost_pair, size);
2604 /* Returns description of computation cost of expression whose runtime
2605 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2607 static comp_cost
2608 new_cost (unsigned runtime, unsigned complexity)
2610 comp_cost cost;
2612 cost.cost = runtime;
2613 cost.complexity = complexity;
2615 return cost;
2618 /* Adds costs COST1 and COST2. */
2620 static comp_cost
2621 add_costs (comp_cost cost1, comp_cost cost2)
2623 cost1.cost += cost2.cost;
2624 cost1.complexity += cost2.complexity;
2626 return cost1;
2628 /* Subtracts costs COST1 and COST2. */
2630 static comp_cost
2631 sub_costs (comp_cost cost1, comp_cost cost2)
2633 cost1.cost -= cost2.cost;
2634 cost1.complexity -= cost2.complexity;
2636 return cost1;
2639 /* Returns a negative number if COST1 < COST2, a positive number if
2640 COST1 > COST2, and 0 if COST1 = COST2. */
2642 static int
2643 compare_costs (comp_cost cost1, comp_cost cost2)
2645 if (cost1.cost == cost2.cost)
2646 return cost1.complexity - cost2.complexity;
2648 return cost1.cost - cost2.cost;
2651 /* Returns true if COST is infinite. */
2653 static bool
2654 infinite_cost_p (comp_cost cost)
2656 return cost.cost == INFTY;
2659 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2660 on invariants DEPENDS_ON and that the value used in expressing it
2661 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2663 static void
2664 set_use_iv_cost (struct ivopts_data *data,
2665 struct iv_use *use, struct iv_cand *cand,
2666 comp_cost cost, bitmap depends_on, tree value,
2667 enum tree_code comp, int inv_expr_id)
2669 unsigned i, s;
2671 if (infinite_cost_p (cost))
2673 BITMAP_FREE (depends_on);
2674 return;
2677 if (data->consider_all_candidates)
2679 use->cost_map[cand->id].cand = cand;
2680 use->cost_map[cand->id].cost = cost;
2681 use->cost_map[cand->id].depends_on = depends_on;
2682 use->cost_map[cand->id].value = value;
2683 use->cost_map[cand->id].comp = comp;
2684 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2685 return;
2688 /* n_map_members is a power of two, so this computes modulo. */
2689 s = cand->id & (use->n_map_members - 1);
2690 for (i = s; i < use->n_map_members; i++)
2691 if (!use->cost_map[i].cand)
2692 goto found;
2693 for (i = 0; i < s; i++)
2694 if (!use->cost_map[i].cand)
2695 goto found;
2697 gcc_unreachable ();
2699 found:
2700 use->cost_map[i].cand = cand;
2701 use->cost_map[i].cost = cost;
2702 use->cost_map[i].depends_on = depends_on;
2703 use->cost_map[i].value = value;
2704 use->cost_map[i].comp = comp;
2705 use->cost_map[i].inv_expr_id = inv_expr_id;
2708 /* Gets cost of (USE, CANDIDATE) pair. */
2710 static struct cost_pair *
2711 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2712 struct iv_cand *cand)
2714 unsigned i, s;
2715 struct cost_pair *ret;
2717 if (!cand)
2718 return NULL;
2720 if (data->consider_all_candidates)
2722 ret = use->cost_map + cand->id;
2723 if (!ret->cand)
2724 return NULL;
2726 return ret;
2729 /* n_map_members is a power of two, so this computes modulo. */
2730 s = cand->id & (use->n_map_members - 1);
2731 for (i = s; i < use->n_map_members; i++)
2732 if (use->cost_map[i].cand == cand)
2733 return use->cost_map + i;
2735 for (i = 0; i < s; i++)
2736 if (use->cost_map[i].cand == cand)
2737 return use->cost_map + i;
2739 return NULL;
2742 /* Returns estimate on cost of computing SEQ. */
2744 static unsigned
2745 seq_cost (rtx seq, bool speed)
2747 unsigned cost = 0;
2748 rtx set;
2750 for (; seq; seq = NEXT_INSN (seq))
2752 set = single_set (seq);
2753 if (set)
2754 cost += set_src_cost (SET_SRC (set), speed);
2755 else
2756 cost++;
2759 return cost;
2762 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2763 static rtx
2764 produce_memory_decl_rtl (tree obj, int *regno)
2766 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2767 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2768 rtx x;
2770 gcc_assert (obj);
2771 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2773 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2774 x = gen_rtx_SYMBOL_REF (address_mode, name);
2775 SET_SYMBOL_REF_DECL (x, obj);
2776 x = gen_rtx_MEM (DECL_MODE (obj), x);
2777 set_mem_addr_space (x, as);
2778 targetm.encode_section_info (obj, x, true);
2780 else
2782 x = gen_raw_REG (address_mode, (*regno)++);
2783 x = gen_rtx_MEM (DECL_MODE (obj), x);
2784 set_mem_addr_space (x, as);
2787 return x;
2790 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2791 walk_tree. DATA contains the actual fake register number. */
2793 static tree
2794 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2796 tree obj = NULL_TREE;
2797 rtx x = NULL_RTX;
2798 int *regno = (int *) data;
2800 switch (TREE_CODE (*expr_p))
2802 case ADDR_EXPR:
2803 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2804 handled_component_p (*expr_p);
2805 expr_p = &TREE_OPERAND (*expr_p, 0))
2806 continue;
2807 obj = *expr_p;
2808 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2809 x = produce_memory_decl_rtl (obj, regno);
2810 break;
2812 case SSA_NAME:
2813 *ws = 0;
2814 obj = SSA_NAME_VAR (*expr_p);
2815 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2816 if (!obj)
2817 return NULL_TREE;
2818 if (!DECL_RTL_SET_P (obj))
2819 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820 break;
2822 case VAR_DECL:
2823 case PARM_DECL:
2824 case RESULT_DECL:
2825 *ws = 0;
2826 obj = *expr_p;
2828 if (DECL_RTL_SET_P (obj))
2829 break;
2831 if (DECL_MODE (obj) == BLKmode)
2832 x = produce_memory_decl_rtl (obj, regno);
2833 else
2834 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2836 break;
2838 default:
2839 break;
2842 if (x)
2844 decl_rtl_to_reset.safe_push (obj);
2845 SET_DECL_RTL (obj, x);
2848 return NULL_TREE;
2851 /* Determines cost of the computation of EXPR. */
2853 static unsigned
2854 computation_cost (tree expr, bool speed)
2856 rtx seq, rslt;
2857 tree type = TREE_TYPE (expr);
2858 unsigned cost;
2859 /* Avoid using hard regs in ways which may be unsupported. */
2860 int regno = LAST_VIRTUAL_REGISTER + 1;
2861 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2862 enum node_frequency real_frequency = node->frequency;
2864 node->frequency = NODE_FREQUENCY_NORMAL;
2865 crtl->maybe_hot_insn_p = speed;
2866 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2867 start_sequence ();
2868 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2869 seq = get_insns ();
2870 end_sequence ();
2871 default_rtl_profile ();
2872 node->frequency = real_frequency;
2874 cost = seq_cost (seq, speed);
2875 if (MEM_P (rslt))
2876 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2877 TYPE_ADDR_SPACE (type), speed);
2878 else if (!REG_P (rslt))
2879 cost += set_src_cost (rslt, speed);
2881 return cost;
2884 /* Returns variable containing the value of candidate CAND at statement AT. */
2886 static tree
2887 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2889 if (stmt_after_increment (loop, cand, stmt))
2890 return cand->var_after;
2891 else
2892 return cand->var_before;
2895 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2896 same precision that is at least as wide as the precision of TYPE, stores
2897 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2898 type of A and B. */
2900 static tree
2901 determine_common_wider_type (tree *a, tree *b)
2903 tree wider_type = NULL;
2904 tree suba, subb;
2905 tree atype = TREE_TYPE (*a);
2907 if (CONVERT_EXPR_P (*a))
2909 suba = TREE_OPERAND (*a, 0);
2910 wider_type = TREE_TYPE (suba);
2911 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2912 return atype;
2914 else
2915 return atype;
2917 if (CONVERT_EXPR_P (*b))
2919 subb = TREE_OPERAND (*b, 0);
2920 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2921 return atype;
2923 else
2924 return atype;
2926 *a = suba;
2927 *b = subb;
2928 return wider_type;
2931 /* Determines the expression by that USE is expressed from induction variable
2932 CAND at statement AT in LOOP. The expression is stored in a decomposed
2933 form into AFF. Returns false if USE cannot be expressed using CAND. */
2935 static bool
2936 get_computation_aff (struct loop *loop,
2937 struct iv_use *use, struct iv_cand *cand, gimple at,
2938 struct affine_tree_combination *aff)
2940 tree ubase = use->iv->base;
2941 tree ustep = use->iv->step;
2942 tree cbase = cand->iv->base;
2943 tree cstep = cand->iv->step, cstep_common;
2944 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2945 tree common_type, var;
2946 tree uutype;
2947 aff_tree cbase_aff, var_aff;
2948 double_int rat;
2950 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2952 /* We do not have a precision to express the values of use. */
2953 return false;
2956 var = var_at_stmt (loop, cand, at);
2957 uutype = unsigned_type_for (utype);
2959 /* If the conversion is not noop, perform it. */
2960 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2962 cstep = fold_convert (uutype, cstep);
2963 cbase = fold_convert (uutype, cbase);
2964 var = fold_convert (uutype, var);
2967 if (!constant_multiple_of (ustep, cstep, &rat))
2968 return false;
2970 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2971 type, we achieve better folding by computing their difference in this
2972 wider type, and cast the result to UUTYPE. We do not need to worry about
2973 overflows, as all the arithmetics will in the end be performed in UUTYPE
2974 anyway. */
2975 common_type = determine_common_wider_type (&ubase, &cbase);
2977 /* use = ubase - ratio * cbase + ratio * var. */
2978 tree_to_aff_combination (ubase, common_type, aff);
2979 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2980 tree_to_aff_combination (var, uutype, &var_aff);
2982 /* We need to shift the value if we are after the increment. */
2983 if (stmt_after_increment (loop, cand, at))
2985 aff_tree cstep_aff;
2987 if (common_type != uutype)
2988 cstep_common = fold_convert (common_type, cstep);
2989 else
2990 cstep_common = cstep;
2992 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2993 aff_combination_add (&cbase_aff, &cstep_aff);
2996 aff_combination_scale (&cbase_aff, -rat);
2997 aff_combination_add (aff, &cbase_aff);
2998 if (common_type != uutype)
2999 aff_combination_convert (aff, uutype);
3001 aff_combination_scale (&var_aff, rat);
3002 aff_combination_add (aff, &var_aff);
3004 return true;
3007 /* Return the type of USE. */
3009 static tree
3010 get_use_type (struct iv_use *use)
3012 tree base_type = TREE_TYPE (use->iv->base);
3013 tree type;
3015 if (use->type == USE_ADDRESS)
3017 /* The base_type may be a void pointer. Create a pointer type based on
3018 the mem_ref instead. */
3019 type = build_pointer_type (TREE_TYPE (*use->op_p));
3020 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3021 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3023 else
3024 type = base_type;
3026 return type;
3029 /* Determines the expression by that USE is expressed from induction variable
3030 CAND at statement AT in LOOP. The computation is unshared. */
3032 static tree
3033 get_computation_at (struct loop *loop,
3034 struct iv_use *use, struct iv_cand *cand, gimple at)
3036 aff_tree aff;
3037 tree type = get_use_type (use);
3039 if (!get_computation_aff (loop, use, cand, at, &aff))
3040 return NULL_TREE;
3041 unshare_aff_combination (&aff);
3042 return fold_convert (type, aff_combination_to_tree (&aff));
3045 /* Determines the expression by that USE is expressed from induction variable
3046 CAND in LOOP. The computation is unshared. */
3048 static tree
3049 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3051 return get_computation_at (loop, use, cand, use->stmt);
3054 /* Adjust the cost COST for being in loop setup rather than loop body.
3055 If we're optimizing for space, the loop setup overhead is constant;
3056 if we're optimizing for speed, amortize it over the per-iteration cost. */
3057 static unsigned
3058 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3060 if (cost == INFTY)
3061 return cost;
3062 else if (optimize_loop_for_speed_p (data->current_loop))
3063 return cost / avg_loop_niter (data->current_loop);
3064 else
3065 return cost;
3068 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3069 validity for a memory reference accessing memory of mode MODE in
3070 address space AS. */
3073 bool
3074 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3075 addr_space_t as)
3077 #define MAX_RATIO 128
3078 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3079 static vec<sbitmap> valid_mult_list;
3080 sbitmap valid_mult;
3082 if (data_index >= valid_mult_list.length ())
3083 valid_mult_list.safe_grow_cleared (data_index + 1);
3085 valid_mult = valid_mult_list[data_index];
3086 if (!valid_mult)
3088 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3089 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3090 rtx addr;
3091 HOST_WIDE_INT i;
3093 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3094 bitmap_clear (valid_mult);
3095 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3096 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3098 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3099 if (memory_address_addr_space_p (mode, addr, as))
3100 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3103 if (dump_file && (dump_flags & TDF_DETAILS))
3105 fprintf (dump_file, " allowed multipliers:");
3106 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3107 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3108 fprintf (dump_file, " %d", (int) i);
3109 fprintf (dump_file, "\n");
3110 fprintf (dump_file, "\n");
3113 valid_mult_list[data_index] = valid_mult;
3116 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3117 return false;
3119 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3122 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3123 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3124 variable is omitted. Compute the cost for a memory reference that accesses
3125 a memory location of mode MEM_MODE in address space AS.
3127 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3128 size of MEM_MODE / RATIO) is available. To make this determination, we
3129 look at the size of the increment to be made, which is given in CSTEP.
3130 CSTEP may be zero if the step is unknown.
3131 STMT_AFTER_INC is true iff the statement we're looking at is after the
3132 increment of the original biv.
3134 TODO -- there must be some better way. This all is quite crude. */
3136 typedef struct address_cost_data_s
3138 HOST_WIDE_INT min_offset, max_offset;
3139 unsigned costs[2][2][2][2];
3140 } *address_cost_data;
3143 static comp_cost
3144 get_address_cost (bool symbol_present, bool var_present,
3145 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3146 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3147 addr_space_t as, bool speed,
3148 bool stmt_after_inc, bool *may_autoinc)
3150 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3151 static vec<address_cost_data> address_cost_data_list;
3152 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3153 address_cost_data data;
3154 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3155 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3156 unsigned cost, acost, complexity;
3157 bool offset_p, ratio_p, autoinc;
3158 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3159 unsigned HOST_WIDE_INT mask;
3160 unsigned bits;
3162 if (data_index >= address_cost_data_list.length ())
3163 address_cost_data_list.safe_grow_cleared (data_index + 1);
3165 data = address_cost_data_list[data_index];
3166 if (!data)
3168 HOST_WIDE_INT i;
3169 HOST_WIDE_INT rat, off = 0;
3170 int old_cse_not_expected, width;
3171 unsigned sym_p, var_p, off_p, rat_p, add_c;
3172 rtx seq, addr, base;
3173 rtx reg0, reg1;
3175 data = (address_cost_data) xcalloc (1, sizeof (*data));
3177 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3179 width = GET_MODE_BITSIZE (address_mode) - 1;
3180 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3181 width = HOST_BITS_PER_WIDE_INT - 1;
3182 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3184 for (i = width; i >= 0; i--)
3186 off = -((unsigned HOST_WIDE_INT) 1 << i);
3187 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3188 if (memory_address_addr_space_p (mem_mode, addr, as))
3189 break;
3191 data->min_offset = (i == -1? 0 : off);
3193 for (i = width; i >= 0; i--)
3195 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3196 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3197 if (memory_address_addr_space_p (mem_mode, addr, as))
3198 break;
3200 if (i == -1)
3201 off = 0;
3202 data->max_offset = off;
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3206 fprintf (dump_file, "get_address_cost:\n");
3207 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3208 GET_MODE_NAME (mem_mode),
3209 data->min_offset);
3210 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3211 GET_MODE_NAME (mem_mode),
3212 data->max_offset);
3215 rat = 1;
3216 for (i = 2; i <= MAX_RATIO; i++)
3217 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3219 rat = i;
3220 break;
3223 /* Compute the cost of various addressing modes. */
3224 acost = 0;
3225 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3226 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3228 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3229 || USE_STORE_PRE_DECREMENT (mem_mode))
3231 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3232 has_predec[mem_mode]
3233 = memory_address_addr_space_p (mem_mode, addr, as);
3235 if (USE_LOAD_POST_DECREMENT (mem_mode)
3236 || USE_STORE_POST_DECREMENT (mem_mode))
3238 addr = gen_rtx_POST_DEC (address_mode, reg0);
3239 has_postdec[mem_mode]
3240 = memory_address_addr_space_p (mem_mode, addr, as);
3242 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3243 || USE_STORE_PRE_DECREMENT (mem_mode))
3245 addr = gen_rtx_PRE_INC (address_mode, reg0);
3246 has_preinc[mem_mode]
3247 = memory_address_addr_space_p (mem_mode, addr, as);
3249 if (USE_LOAD_POST_INCREMENT (mem_mode)
3250 || USE_STORE_POST_INCREMENT (mem_mode))
3252 addr = gen_rtx_POST_INC (address_mode, reg0);
3253 has_postinc[mem_mode]
3254 = memory_address_addr_space_p (mem_mode, addr, as);
3256 for (i = 0; i < 16; i++)
3258 sym_p = i & 1;
3259 var_p = (i >> 1) & 1;
3260 off_p = (i >> 2) & 1;
3261 rat_p = (i >> 3) & 1;
3263 addr = reg0;
3264 if (rat_p)
3265 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3266 gen_int_mode (rat, address_mode));
3268 if (var_p)
3269 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3271 if (sym_p)
3273 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3274 /* ??? We can run into trouble with some backends by presenting
3275 it with symbols which haven't been properly passed through
3276 targetm.encode_section_info. By setting the local bit, we
3277 enhance the probability of things working. */
3278 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3280 if (off_p)
3281 base = gen_rtx_fmt_e (CONST, address_mode,
3282 gen_rtx_fmt_ee
3283 (PLUS, address_mode, base,
3284 gen_int_mode (off, address_mode)));
3286 else if (off_p)
3287 base = gen_int_mode (off, address_mode);
3288 else
3289 base = NULL_RTX;
3291 if (base)
3292 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3294 start_sequence ();
3295 /* To avoid splitting addressing modes, pretend that no cse will
3296 follow. */
3297 old_cse_not_expected = cse_not_expected;
3298 cse_not_expected = true;
3299 addr = memory_address_addr_space (mem_mode, addr, as);
3300 cse_not_expected = old_cse_not_expected;
3301 seq = get_insns ();
3302 end_sequence ();
3304 acost = seq_cost (seq, speed);
3305 acost += address_cost (addr, mem_mode, as, speed);
3307 if (!acost)
3308 acost = 1;
3309 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3312 /* On some targets, it is quite expensive to load symbol to a register,
3313 which makes addresses that contain symbols look much more expensive.
3314 However, the symbol will have to be loaded in any case before the
3315 loop (and quite likely we have it in register already), so it does not
3316 make much sense to penalize them too heavily. So make some final
3317 tweaks for the SYMBOL_PRESENT modes:
3319 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3320 var is cheaper, use this mode with small penalty.
3321 If VAR_PRESENT is true, try whether the mode with
3322 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3323 if this is the case, use it. */
3324 add_c = add_cost (speed, address_mode);
3325 for (i = 0; i < 8; i++)
3327 var_p = i & 1;
3328 off_p = (i >> 1) & 1;
3329 rat_p = (i >> 2) & 1;
3331 acost = data->costs[0][1][off_p][rat_p] + 1;
3332 if (var_p)
3333 acost += add_c;
3335 if (acost < data->costs[1][var_p][off_p][rat_p])
3336 data->costs[1][var_p][off_p][rat_p] = acost;
3339 if (dump_file && (dump_flags & TDF_DETAILS))
3341 fprintf (dump_file, "Address costs:\n");
3343 for (i = 0; i < 16; i++)
3345 sym_p = i & 1;
3346 var_p = (i >> 1) & 1;
3347 off_p = (i >> 2) & 1;
3348 rat_p = (i >> 3) & 1;
3350 fprintf (dump_file, " ");
3351 if (sym_p)
3352 fprintf (dump_file, "sym + ");
3353 if (var_p)
3354 fprintf (dump_file, "var + ");
3355 if (off_p)
3356 fprintf (dump_file, "cst + ");
3357 if (rat_p)
3358 fprintf (dump_file, "rat * ");
3360 acost = data->costs[sym_p][var_p][off_p][rat_p];
3361 fprintf (dump_file, "index costs %d\n", acost);
3363 if (has_predec[mem_mode] || has_postdec[mem_mode]
3364 || has_preinc[mem_mode] || has_postinc[mem_mode])
3365 fprintf (dump_file, " May include autoinc/dec\n");
3366 fprintf (dump_file, "\n");
3369 address_cost_data_list[data_index] = data;
3372 bits = GET_MODE_BITSIZE (address_mode);
3373 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3374 offset &= mask;
3375 if ((offset >> (bits - 1) & 1))
3376 offset |= ~mask;
3377 s_offset = offset;
3379 autoinc = false;
3380 msize = GET_MODE_SIZE (mem_mode);
3381 autoinc_offset = offset;
3382 if (stmt_after_inc)
3383 autoinc_offset += ratio * cstep;
3384 if (symbol_present || var_present || ratio != 1)
3385 autoinc = false;
3386 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3387 && msize == cstep)
3388 || (has_postdec[mem_mode] && autoinc_offset == 0
3389 && msize == -cstep)
3390 || (has_preinc[mem_mode] && autoinc_offset == msize
3391 && msize == cstep)
3392 || (has_predec[mem_mode] && autoinc_offset == -msize
3393 && msize == -cstep))
3394 autoinc = true;
3396 cost = 0;
3397 offset_p = (s_offset != 0
3398 && data->min_offset <= s_offset
3399 && s_offset <= data->max_offset);
3400 ratio_p = (ratio != 1
3401 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3403 if (ratio != 1 && !ratio_p)
3404 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3406 if (s_offset && !offset_p && !symbol_present)
3407 cost += add_cost (speed, address_mode);
3409 if (may_autoinc)
3410 *may_autoinc = autoinc;
3411 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3412 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3413 return new_cost (cost + acost, complexity);
3416 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3417 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3418 calculating the operands of EXPR. Returns true if successful, and returns
3419 the cost in COST. */
3421 static bool
3422 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3423 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3425 comp_cost res;
3426 tree op1 = TREE_OPERAND (expr, 1);
3427 tree cst = TREE_OPERAND (mult, 1);
3428 tree multop = TREE_OPERAND (mult, 0);
3429 int m = exact_log2 (int_cst_value (cst));
3430 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3431 int sa_cost;
3433 if (!(m >= 0 && m < maxm))
3434 return false;
3436 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3437 ? shiftadd_cost (speed, mode, m)
3438 : (mult == op1
3439 ? shiftsub1_cost (speed, mode, m)
3440 : shiftsub0_cost (speed, mode, m)));
3441 res = new_cost (sa_cost, 0);
3442 res = add_costs (res, mult == op1 ? cost0 : cost1);
3444 STRIP_NOPS (multop);
3445 if (!is_gimple_val (multop))
3446 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3448 *cost = res;
3449 return true;
3452 /* Estimates cost of forcing expression EXPR into a variable. */
3454 static comp_cost
3455 force_expr_to_var_cost (tree expr, bool speed)
3457 static bool costs_initialized = false;
3458 static unsigned integer_cost [2];
3459 static unsigned symbol_cost [2];
3460 static unsigned address_cost [2];
3461 tree op0, op1;
3462 comp_cost cost0, cost1, cost;
3463 enum machine_mode mode;
3465 if (!costs_initialized)
3467 tree type = build_pointer_type (integer_type_node);
3468 tree var, addr;
3469 rtx x;
3470 int i;
3472 var = create_tmp_var_raw (integer_type_node, "test_var");
3473 TREE_STATIC (var) = 1;
3474 x = produce_memory_decl_rtl (var, NULL);
3475 SET_DECL_RTL (var, x);
3477 addr = build1 (ADDR_EXPR, type, var);
3480 for (i = 0; i < 2; i++)
3482 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3483 2000), i);
3485 symbol_cost[i] = computation_cost (addr, i) + 1;
3487 address_cost[i]
3488 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3489 if (dump_file && (dump_flags & TDF_DETAILS))
3491 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3492 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3493 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3494 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3495 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3496 fprintf (dump_file, "\n");
3500 costs_initialized = true;
3503 STRIP_NOPS (expr);
3505 if (SSA_VAR_P (expr))
3506 return no_cost;
3508 if (is_gimple_min_invariant (expr))
3510 if (TREE_CODE (expr) == INTEGER_CST)
3511 return new_cost (integer_cost [speed], 0);
3513 if (TREE_CODE (expr) == ADDR_EXPR)
3515 tree obj = TREE_OPERAND (expr, 0);
3517 if (TREE_CODE (obj) == VAR_DECL
3518 || TREE_CODE (obj) == PARM_DECL
3519 || TREE_CODE (obj) == RESULT_DECL)
3520 return new_cost (symbol_cost [speed], 0);
3523 return new_cost (address_cost [speed], 0);
3526 switch (TREE_CODE (expr))
3528 case POINTER_PLUS_EXPR:
3529 case PLUS_EXPR:
3530 case MINUS_EXPR:
3531 case MULT_EXPR:
3532 op0 = TREE_OPERAND (expr, 0);
3533 op1 = TREE_OPERAND (expr, 1);
3534 STRIP_NOPS (op0);
3535 STRIP_NOPS (op1);
3537 if (is_gimple_val (op0))
3538 cost0 = no_cost;
3539 else
3540 cost0 = force_expr_to_var_cost (op0, speed);
3542 if (is_gimple_val (op1))
3543 cost1 = no_cost;
3544 else
3545 cost1 = force_expr_to_var_cost (op1, speed);
3547 break;
3549 case NEGATE_EXPR:
3550 op0 = TREE_OPERAND (expr, 0);
3551 STRIP_NOPS (op0);
3552 op1 = NULL_TREE;
3554 if (is_gimple_val (op0))
3555 cost0 = no_cost;
3556 else
3557 cost0 = force_expr_to_var_cost (op0, speed);
3559 cost1 = no_cost;
3560 break;
3562 default:
3563 /* Just an arbitrary value, FIXME. */
3564 return new_cost (target_spill_cost[speed], 0);
3567 mode = TYPE_MODE (TREE_TYPE (expr));
3568 switch (TREE_CODE (expr))
3570 case POINTER_PLUS_EXPR:
3571 case PLUS_EXPR:
3572 case MINUS_EXPR:
3573 case NEGATE_EXPR:
3574 cost = new_cost (add_cost (speed, mode), 0);
3575 if (TREE_CODE (expr) != NEGATE_EXPR)
3577 tree mult = NULL_TREE;
3578 comp_cost sa_cost;
3579 if (TREE_CODE (op1) == MULT_EXPR)
3580 mult = op1;
3581 else if (TREE_CODE (op0) == MULT_EXPR)
3582 mult = op0;
3584 if (mult != NULL_TREE
3585 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3586 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3587 speed, &sa_cost))
3588 return sa_cost;
3590 break;
3592 case MULT_EXPR:
3593 if (cst_and_fits_in_hwi (op0))
3594 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3595 mode, speed), 0);
3596 else if (cst_and_fits_in_hwi (op1))
3597 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3598 mode, speed), 0);
3599 else
3600 return new_cost (target_spill_cost [speed], 0);
3601 break;
3603 default:
3604 gcc_unreachable ();
3607 cost = add_costs (cost, cost0);
3608 cost = add_costs (cost, cost1);
3610 /* Bound the cost by target_spill_cost. The parts of complicated
3611 computations often are either loop invariant or at least can
3612 be shared between several iv uses, so letting this grow without
3613 limits would not give reasonable results. */
3614 if (cost.cost > (int) target_spill_cost [speed])
3615 cost.cost = target_spill_cost [speed];
3617 return cost;
3620 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3621 invariants the computation depends on. */
3623 static comp_cost
3624 force_var_cost (struct ivopts_data *data,
3625 tree expr, bitmap *depends_on)
3627 if (depends_on)
3629 fd_ivopts_data = data;
3630 walk_tree (&expr, find_depends, depends_on, NULL);
3633 return force_expr_to_var_cost (expr, data->speed);
3636 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3637 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3638 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3639 invariants the computation depends on. */
3641 static comp_cost
3642 split_address_cost (struct ivopts_data *data,
3643 tree addr, bool *symbol_present, bool *var_present,
3644 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3646 tree core;
3647 HOST_WIDE_INT bitsize;
3648 HOST_WIDE_INT bitpos;
3649 tree toffset;
3650 enum machine_mode mode;
3651 int unsignedp, volatilep;
3653 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3654 &unsignedp, &volatilep, false);
3656 if (toffset != 0
3657 || bitpos % BITS_PER_UNIT != 0
3658 || TREE_CODE (core) != VAR_DECL)
3660 *symbol_present = false;
3661 *var_present = true;
3662 fd_ivopts_data = data;
3663 walk_tree (&addr, find_depends, depends_on, NULL);
3664 return new_cost (target_spill_cost[data->speed], 0);
3667 *offset += bitpos / BITS_PER_UNIT;
3668 if (TREE_STATIC (core)
3669 || DECL_EXTERNAL (core))
3671 *symbol_present = true;
3672 *var_present = false;
3673 return no_cost;
3676 *symbol_present = false;
3677 *var_present = true;
3678 return no_cost;
3681 /* Estimates cost of expressing difference of addresses E1 - E2 as
3682 var + symbol + offset. The value of offset is added to OFFSET,
3683 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3684 part is missing. DEPENDS_ON is a set of the invariants the computation
3685 depends on. */
3687 static comp_cost
3688 ptr_difference_cost (struct ivopts_data *data,
3689 tree e1, tree e2, bool *symbol_present, bool *var_present,
3690 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3692 HOST_WIDE_INT diff = 0;
3693 aff_tree aff_e1, aff_e2;
3694 tree type;
3696 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3698 if (ptr_difference_const (e1, e2, &diff))
3700 *offset += diff;
3701 *symbol_present = false;
3702 *var_present = false;
3703 return no_cost;
3706 if (integer_zerop (e2))
3707 return split_address_cost (data, TREE_OPERAND (e1, 0),
3708 symbol_present, var_present, offset, depends_on);
3710 *symbol_present = false;
3711 *var_present = true;
3713 type = signed_type_for (TREE_TYPE (e1));
3714 tree_to_aff_combination (e1, type, &aff_e1);
3715 tree_to_aff_combination (e2, type, &aff_e2);
3716 aff_combination_scale (&aff_e2, double_int_minus_one);
3717 aff_combination_add (&aff_e1, &aff_e2);
3719 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3722 /* Estimates cost of expressing difference E1 - E2 as
3723 var + symbol + offset. The value of offset is added to OFFSET,
3724 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3725 part is missing. DEPENDS_ON is a set of the invariants the computation
3726 depends on. */
3728 static comp_cost
3729 difference_cost (struct ivopts_data *data,
3730 tree e1, tree e2, bool *symbol_present, bool *var_present,
3731 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3733 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3734 unsigned HOST_WIDE_INT off1, off2;
3735 aff_tree aff_e1, aff_e2;
3736 tree type;
3738 e1 = strip_offset (e1, &off1);
3739 e2 = strip_offset (e2, &off2);
3740 *offset += off1 - off2;
3742 STRIP_NOPS (e1);
3743 STRIP_NOPS (e2);
3745 if (TREE_CODE (e1) == ADDR_EXPR)
3746 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3747 offset, depends_on);
3748 *symbol_present = false;
3750 if (operand_equal_p (e1, e2, 0))
3752 *var_present = false;
3753 return no_cost;
3756 *var_present = true;
3758 if (integer_zerop (e2))
3759 return force_var_cost (data, e1, depends_on);
3761 if (integer_zerop (e1))
3763 comp_cost cost = force_var_cost (data, e2, depends_on);
3764 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3765 return cost;
3768 type = signed_type_for (TREE_TYPE (e1));
3769 tree_to_aff_combination (e1, type, &aff_e1);
3770 tree_to_aff_combination (e2, type, &aff_e2);
3771 aff_combination_scale (&aff_e2, double_int_minus_one);
3772 aff_combination_add (&aff_e1, &aff_e2);
3774 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3777 /* Returns true if AFF1 and AFF2 are identical. */
3779 static bool
3780 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3782 unsigned i;
3784 if (aff1->n != aff2->n)
3785 return false;
3787 for (i = 0; i < aff1->n; i++)
3789 if (aff1->elts[i].coef != aff2->elts[i].coef)
3790 return false;
3792 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3793 return false;
3795 return true;
3798 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3800 static int
3801 get_expr_id (struct ivopts_data *data, tree expr)
3803 struct iv_inv_expr_ent ent;
3804 struct iv_inv_expr_ent **slot;
3806 ent.expr = expr;
3807 ent.hash = iterative_hash_expr (expr, 0);
3808 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3809 &ent, INSERT);
3810 if (*slot)
3811 return (*slot)->id;
3813 *slot = XNEW (struct iv_inv_expr_ent);
3814 (*slot)->expr = expr;
3815 (*slot)->hash = ent.hash;
3816 (*slot)->id = data->inv_expr_id++;
3817 return (*slot)->id;
3820 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3821 requires a new compiler generated temporary. Returns -1 otherwise.
3822 ADDRESS_P is a flag indicating if the expression is for address
3823 computation. */
3825 static int
3826 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3827 tree cbase, HOST_WIDE_INT ratio,
3828 bool address_p)
3830 aff_tree ubase_aff, cbase_aff;
3831 tree expr, ub, cb;
3833 STRIP_NOPS (ubase);
3834 STRIP_NOPS (cbase);
3835 ub = ubase;
3836 cb = cbase;
3838 if ((TREE_CODE (ubase) == INTEGER_CST)
3839 && (TREE_CODE (cbase) == INTEGER_CST))
3840 return -1;
3842 /* Strips the constant part. */
3843 if (TREE_CODE (ubase) == PLUS_EXPR
3844 || TREE_CODE (ubase) == MINUS_EXPR
3845 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3847 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3848 ubase = TREE_OPERAND (ubase, 0);
3851 /* Strips the constant part. */
3852 if (TREE_CODE (cbase) == PLUS_EXPR
3853 || TREE_CODE (cbase) == MINUS_EXPR
3854 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3856 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3857 cbase = TREE_OPERAND (cbase, 0);
3860 if (address_p)
3862 if (((TREE_CODE (ubase) == SSA_NAME)
3863 || (TREE_CODE (ubase) == ADDR_EXPR
3864 && is_gimple_min_invariant (ubase)))
3865 && (TREE_CODE (cbase) == INTEGER_CST))
3866 return -1;
3868 if (((TREE_CODE (cbase) == SSA_NAME)
3869 || (TREE_CODE (cbase) == ADDR_EXPR
3870 && is_gimple_min_invariant (cbase)))
3871 && (TREE_CODE (ubase) == INTEGER_CST))
3872 return -1;
3875 if (ratio == 1)
3877 if(operand_equal_p (ubase, cbase, 0))
3878 return -1;
3880 if (TREE_CODE (ubase) == ADDR_EXPR
3881 && TREE_CODE (cbase) == ADDR_EXPR)
3883 tree usym, csym;
3885 usym = TREE_OPERAND (ubase, 0);
3886 csym = TREE_OPERAND (cbase, 0);
3887 if (TREE_CODE (usym) == ARRAY_REF)
3889 tree ind = TREE_OPERAND (usym, 1);
3890 if (TREE_CODE (ind) == INTEGER_CST
3891 && host_integerp (ind, 0)
3892 && TREE_INT_CST_LOW (ind) == 0)
3893 usym = TREE_OPERAND (usym, 0);
3895 if (TREE_CODE (csym) == ARRAY_REF)
3897 tree ind = TREE_OPERAND (csym, 1);
3898 if (TREE_CODE (ind) == INTEGER_CST
3899 && host_integerp (ind, 0)
3900 && TREE_INT_CST_LOW (ind) == 0)
3901 csym = TREE_OPERAND (csym, 0);
3903 if (operand_equal_p (usym, csym, 0))
3904 return -1;
3906 /* Now do more complex comparison */
3907 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3908 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3909 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3910 return -1;
3913 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3914 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3916 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3917 aff_combination_add (&ubase_aff, &cbase_aff);
3918 expr = aff_combination_to_tree (&ubase_aff);
3919 return get_expr_id (data, expr);
3924 /* Determines the cost of the computation by that USE is expressed
3925 from induction variable CAND. If ADDRESS_P is true, we just need
3926 to create an address from it, otherwise we want to get it into
3927 register. A set of invariants we depend on is stored in
3928 DEPENDS_ON. AT is the statement at that the value is computed.
3929 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3930 addressing is likely. */
3932 static comp_cost
3933 get_computation_cost_at (struct ivopts_data *data,
3934 struct iv_use *use, struct iv_cand *cand,
3935 bool address_p, bitmap *depends_on, gimple at,
3936 bool *can_autoinc,
3937 int *inv_expr_id)
3939 tree ubase = use->iv->base, ustep = use->iv->step;
3940 tree cbase, cstep;
3941 tree utype = TREE_TYPE (ubase), ctype;
3942 unsigned HOST_WIDE_INT cstepi, offset = 0;
3943 HOST_WIDE_INT ratio, aratio;
3944 bool var_present, symbol_present, stmt_is_after_inc;
3945 comp_cost cost;
3946 double_int rat;
3947 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3948 enum machine_mode mem_mode = (address_p
3949 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3950 : VOIDmode);
3952 *depends_on = NULL;
3954 /* Only consider real candidates. */
3955 if (!cand->iv)
3956 return infinite_cost;
3958 cbase = cand->iv->base;
3959 cstep = cand->iv->step;
3960 ctype = TREE_TYPE (cbase);
3962 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3964 /* We do not have a precision to express the values of use. */
3965 return infinite_cost;
3968 if (address_p
3969 || (use->iv->base_object
3970 && cand->iv->base_object
3971 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3972 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3974 /* Do not try to express address of an object with computation based
3975 on address of a different object. This may cause problems in rtl
3976 level alias analysis (that does not expect this to be happening,
3977 as this is illegal in C), and would be unlikely to be useful
3978 anyway. */
3979 if (use->iv->base_object
3980 && cand->iv->base_object
3981 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3982 return infinite_cost;
3985 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3987 /* TODO -- add direct handling of this case. */
3988 goto fallback;
3991 /* CSTEPI is removed from the offset in case statement is after the
3992 increment. If the step is not constant, we use zero instead.
3993 This is a bit imprecise (there is the extra addition), but
3994 redundancy elimination is likely to transform the code so that
3995 it uses value of the variable before increment anyway,
3996 so it is not that much unrealistic. */
3997 if (cst_and_fits_in_hwi (cstep))
3998 cstepi = int_cst_value (cstep);
3999 else
4000 cstepi = 0;
4002 if (!constant_multiple_of (ustep, cstep, &rat))
4003 return infinite_cost;
4005 if (rat.fits_shwi ())
4006 ratio = rat.to_shwi ();
4007 else
4008 return infinite_cost;
4010 STRIP_NOPS (cbase);
4011 ctype = TREE_TYPE (cbase);
4013 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4015 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4016 or ratio == 1, it is better to handle this like
4018 ubase - ratio * cbase + ratio * var
4020 (also holds in the case ratio == -1, TODO. */
4022 if (cst_and_fits_in_hwi (cbase))
4024 offset = - ratio * int_cst_value (cbase);
4025 cost = difference_cost (data,
4026 ubase, build_int_cst (utype, 0),
4027 &symbol_present, &var_present, &offset,
4028 depends_on);
4029 cost.cost /= avg_loop_niter (data->current_loop);
4031 else if (ratio == 1)
4033 tree real_cbase = cbase;
4035 /* Check to see if any adjustment is needed. */
4036 if (cstepi == 0 && stmt_is_after_inc)
4038 aff_tree real_cbase_aff;
4039 aff_tree cstep_aff;
4041 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4042 &real_cbase_aff);
4043 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4045 aff_combination_add (&real_cbase_aff, &cstep_aff);
4046 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4049 cost = difference_cost (data,
4050 ubase, real_cbase,
4051 &symbol_present, &var_present, &offset,
4052 depends_on);
4053 cost.cost /= avg_loop_niter (data->current_loop);
4055 else if (address_p
4056 && !POINTER_TYPE_P (ctype)
4057 && multiplier_allowed_in_address_p
4058 (ratio, mem_mode,
4059 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4061 cbase
4062 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4063 cost = difference_cost (data,
4064 ubase, cbase,
4065 &symbol_present, &var_present, &offset,
4066 depends_on);
4067 cost.cost /= avg_loop_niter (data->current_loop);
4069 else
4071 cost = force_var_cost (data, cbase, depends_on);
4072 cost = add_costs (cost,
4073 difference_cost (data,
4074 ubase, build_int_cst (utype, 0),
4075 &symbol_present, &var_present,
4076 &offset, depends_on));
4077 cost.cost /= avg_loop_niter (data->current_loop);
4078 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4081 if (inv_expr_id)
4083 *inv_expr_id =
4084 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4085 /* Clear depends on. */
4086 if (*inv_expr_id != -1 && depends_on && *depends_on)
4087 bitmap_clear (*depends_on);
4090 /* If we are after the increment, the value of the candidate is higher by
4091 one iteration. */
4092 if (stmt_is_after_inc)
4093 offset -= ratio * cstepi;
4095 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4096 (symbol/var1/const parts may be omitted). If we are looking for an
4097 address, find the cost of addressing this. */
4098 if (address_p)
4099 return add_costs (cost,
4100 get_address_cost (symbol_present, var_present,
4101 offset, ratio, cstepi,
4102 mem_mode,
4103 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4104 speed, stmt_is_after_inc,
4105 can_autoinc));
4107 /* Otherwise estimate the costs for computing the expression. */
4108 if (!symbol_present && !var_present && !offset)
4110 if (ratio != 1)
4111 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4112 return cost;
4115 /* Symbol + offset should be compile-time computable so consider that they
4116 are added once to the variable, if present. */
4117 if (var_present && (symbol_present || offset))
4118 cost.cost += adjust_setup_cost (data,
4119 add_cost (speed, TYPE_MODE (ctype)));
4121 /* Having offset does not affect runtime cost in case it is added to
4122 symbol, but it increases complexity. */
4123 if (offset)
4124 cost.complexity++;
4126 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4128 aratio = ratio > 0 ? ratio : -ratio;
4129 if (aratio != 1)
4130 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4131 return cost;
4133 fallback:
4134 if (can_autoinc)
4135 *can_autoinc = false;
4138 /* Just get the expression, expand it and measure the cost. */
4139 tree comp = get_computation_at (data->current_loop, use, cand, at);
4141 if (!comp)
4142 return infinite_cost;
4144 if (address_p)
4145 comp = build_simple_mem_ref (comp);
4147 return new_cost (computation_cost (comp, speed), 0);
4151 /* Determines the cost of the computation by that USE is expressed
4152 from induction variable CAND. If ADDRESS_P is true, we just need
4153 to create an address from it, otherwise we want to get it into
4154 register. A set of invariants we depend on is stored in
4155 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4156 autoinc addressing is likely. */
4158 static comp_cost
4159 get_computation_cost (struct ivopts_data *data,
4160 struct iv_use *use, struct iv_cand *cand,
4161 bool address_p, bitmap *depends_on,
4162 bool *can_autoinc, int *inv_expr_id)
4164 return get_computation_cost_at (data,
4165 use, cand, address_p, depends_on, use->stmt,
4166 can_autoinc, inv_expr_id);
4169 /* Determines cost of basing replacement of USE on CAND in a generic
4170 expression. */
4172 static bool
4173 determine_use_iv_cost_generic (struct ivopts_data *data,
4174 struct iv_use *use, struct iv_cand *cand)
4176 bitmap depends_on;
4177 comp_cost cost;
4178 int inv_expr_id = -1;
4180 /* The simple case first -- if we need to express value of the preserved
4181 original biv, the cost is 0. This also prevents us from counting the
4182 cost of increment twice -- once at this use and once in the cost of
4183 the candidate. */
4184 if (cand->pos == IP_ORIGINAL
4185 && cand->incremented_at == use->stmt)
4187 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4188 ERROR_MARK, -1);
4189 return true;
4192 cost = get_computation_cost (data, use, cand, false, &depends_on,
4193 NULL, &inv_expr_id);
4195 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4196 inv_expr_id);
4198 return !infinite_cost_p (cost);
4201 /* Determines cost of basing replacement of USE on CAND in an address. */
4203 static bool
4204 determine_use_iv_cost_address (struct ivopts_data *data,
4205 struct iv_use *use, struct iv_cand *cand)
4207 bitmap depends_on;
4208 bool can_autoinc;
4209 int inv_expr_id = -1;
4210 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4211 &can_autoinc, &inv_expr_id);
4213 if (cand->ainc_use == use)
4215 if (can_autoinc)
4216 cost.cost -= cand->cost_step;
4217 /* If we generated the candidate solely for exploiting autoincrement
4218 opportunities, and it turns out it can't be used, set the cost to
4219 infinity to make sure we ignore it. */
4220 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4221 cost = infinite_cost;
4223 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4224 inv_expr_id);
4226 return !infinite_cost_p (cost);
4229 /* Computes value of candidate CAND at position AT in iteration NITER, and
4230 stores it to VAL. */
4232 static void
4233 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4234 aff_tree *val)
4236 aff_tree step, delta, nit;
4237 struct iv *iv = cand->iv;
4238 tree type = TREE_TYPE (iv->base);
4239 tree steptype = type;
4240 if (POINTER_TYPE_P (type))
4241 steptype = sizetype;
4243 tree_to_aff_combination (iv->step, steptype, &step);
4244 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4245 aff_combination_convert (&nit, steptype);
4246 aff_combination_mult (&nit, &step, &delta);
4247 if (stmt_after_increment (loop, cand, at))
4248 aff_combination_add (&delta, &step);
4250 tree_to_aff_combination (iv->base, type, val);
4251 aff_combination_add (val, &delta);
4254 /* Returns period of induction variable iv. */
4256 static tree
4257 iv_period (struct iv *iv)
4259 tree step = iv->step, period, type;
4260 tree pow2div;
4262 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4264 type = unsigned_type_for (TREE_TYPE (step));
4265 /* Period of the iv is lcm (step, type_range)/step -1,
4266 i.e., N*type_range/step - 1. Since type range is power
4267 of two, N == (step >> num_of_ending_zeros_binary (step),
4268 so the final result is
4270 (type_range >> num_of_ending_zeros_binary (step)) - 1
4273 pow2div = num_ending_zeros (step);
4275 period = build_low_bits_mask (type,
4276 (TYPE_PRECISION (type)
4277 - tree_low_cst (pow2div, 1)));
4279 return period;
4282 /* Returns the comparison operator used when eliminating the iv USE. */
4284 static enum tree_code
4285 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4287 struct loop *loop = data->current_loop;
4288 basic_block ex_bb;
4289 edge exit;
4291 ex_bb = gimple_bb (use->stmt);
4292 exit = EDGE_SUCC (ex_bb, 0);
4293 if (flow_bb_inside_loop_p (loop, exit->dest))
4294 exit = EDGE_SUCC (ex_bb, 1);
4296 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4299 static tree
4300 strip_wrap_conserving_type_conversions (tree exp)
4302 while (tree_ssa_useless_type_conversion (exp)
4303 && (nowrap_type_p (TREE_TYPE (exp))
4304 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4305 exp = TREE_OPERAND (exp, 0);
4306 return exp;
4309 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4310 check for an exact match. */
4312 static bool
4313 expr_equal_p (tree e, tree what)
4315 gimple stmt;
4316 enum tree_code code;
4318 e = strip_wrap_conserving_type_conversions (e);
4319 what = strip_wrap_conserving_type_conversions (what);
4321 code = TREE_CODE (what);
4322 if (TREE_TYPE (e) != TREE_TYPE (what))
4323 return false;
4325 if (operand_equal_p (e, what, 0))
4326 return true;
4328 if (TREE_CODE (e) != SSA_NAME)
4329 return false;
4331 stmt = SSA_NAME_DEF_STMT (e);
4332 if (gimple_code (stmt) != GIMPLE_ASSIGN
4333 || gimple_assign_rhs_code (stmt) != code)
4334 return false;
4336 switch (get_gimple_rhs_class (code))
4338 case GIMPLE_BINARY_RHS:
4339 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4340 return false;
4341 /* Fallthru. */
4343 case GIMPLE_UNARY_RHS:
4344 case GIMPLE_SINGLE_RHS:
4345 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4346 default:
4347 return false;
4351 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4352 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4353 calculation is performed in non-wrapping type.
4355 TODO: More generally, we could test for the situation that
4356 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4357 This would require knowing the sign of OFFSET.
4359 Also, we only look for the first addition in the computation of BASE.
4360 More complex analysis would be better, but introducing it just for
4361 this optimization seems like an overkill. */
4363 static bool
4364 difference_cannot_overflow_p (tree base, tree offset)
4366 enum tree_code code;
4367 tree e1, e2;
4369 if (!nowrap_type_p (TREE_TYPE (base)))
4370 return false;
4372 base = expand_simple_operations (base);
4374 if (TREE_CODE (base) == SSA_NAME)
4376 gimple stmt = SSA_NAME_DEF_STMT (base);
4378 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4379 return false;
4381 code = gimple_assign_rhs_code (stmt);
4382 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4383 return false;
4385 e1 = gimple_assign_rhs1 (stmt);
4386 e2 = gimple_assign_rhs2 (stmt);
4388 else
4390 code = TREE_CODE (base);
4391 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4392 return false;
4393 e1 = TREE_OPERAND (base, 0);
4394 e2 = TREE_OPERAND (base, 1);
4397 /* TODO: deeper inspection may be necessary to prove the equality. */
4398 switch (code)
4400 case PLUS_EXPR:
4401 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4402 case POINTER_PLUS_EXPR:
4403 return expr_equal_p (e2, offset);
4405 default:
4406 return false;
4410 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4411 comparison with CAND. NITER describes the number of iterations of
4412 the loops. If successful, the comparison in COMP_P is altered accordingly.
4414 We aim to handle the following situation:
4416 sometype *base, *p;
4417 int a, b, i;
4419 i = a;
4420 p = p_0 = base + a;
4424 bla (*p);
4425 p++;
4426 i++;
4428 while (i < b);
4430 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4431 We aim to optimize this to
4433 p = p_0 = base + a;
4436 bla (*p);
4437 p++;
4439 while (p < p_0 - a + b);
4441 This preserves the correctness, since the pointer arithmetics does not
4442 overflow. More precisely:
4444 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4445 overflow in computing it or the values of p.
4446 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4447 overflow. To prove this, we use the fact that p_0 = base + a. */
4449 static bool
4450 iv_elimination_compare_lt (struct ivopts_data *data,
4451 struct iv_cand *cand, enum tree_code *comp_p,
4452 struct tree_niter_desc *niter)
4454 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4455 struct affine_tree_combination nit, tmpa, tmpb;
4456 enum tree_code comp;
4457 HOST_WIDE_INT step;
4459 /* We need to know that the candidate induction variable does not overflow.
4460 While more complex analysis may be used to prove this, for now just
4461 check that the variable appears in the original program and that it
4462 is computed in a type that guarantees no overflows. */
4463 cand_type = TREE_TYPE (cand->iv->base);
4464 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4465 return false;
4467 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4468 the calculation of the BOUND could overflow, making the comparison
4469 invalid. */
4470 if (!data->loop_single_exit_p)
4471 return false;
4473 /* We need to be able to decide whether candidate is increasing or decreasing
4474 in order to choose the right comparison operator. */
4475 if (!cst_and_fits_in_hwi (cand->iv->step))
4476 return false;
4477 step = int_cst_value (cand->iv->step);
4479 /* Check that the number of iterations matches the expected pattern:
4480 a + 1 > b ? 0 : b - a - 1. */
4481 mbz = niter->may_be_zero;
4482 if (TREE_CODE (mbz) == GT_EXPR)
4484 /* Handle a + 1 > b. */
4485 tree op0 = TREE_OPERAND (mbz, 0);
4486 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4488 a = TREE_OPERAND (op0, 0);
4489 b = TREE_OPERAND (mbz, 1);
4491 else
4492 return false;
4494 else if (TREE_CODE (mbz) == LT_EXPR)
4496 tree op1 = TREE_OPERAND (mbz, 1);
4498 /* Handle b < a + 1. */
4499 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4501 a = TREE_OPERAND (op1, 0);
4502 b = TREE_OPERAND (mbz, 0);
4504 else
4505 return false;
4507 else
4508 return false;
4510 /* Expected number of iterations is B - A - 1. Check that it matches
4511 the actual number, i.e., that B - A - NITER = 1. */
4512 tree_to_aff_combination (niter->niter, nit_type, &nit);
4513 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4514 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4515 aff_combination_scale (&nit, double_int_minus_one);
4516 aff_combination_scale (&tmpa, double_int_minus_one);
4517 aff_combination_add (&tmpb, &tmpa);
4518 aff_combination_add (&tmpb, &nit);
4519 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4520 return false;
4522 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4523 overflow. */
4524 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4525 cand->iv->step,
4526 fold_convert (TREE_TYPE (cand->iv->step), a));
4527 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4528 return false;
4530 /* Determine the new comparison operator. */
4531 comp = step < 0 ? GT_EXPR : LT_EXPR;
4532 if (*comp_p == NE_EXPR)
4533 *comp_p = comp;
4534 else if (*comp_p == EQ_EXPR)
4535 *comp_p = invert_tree_comparison (comp, false);
4536 else
4537 gcc_unreachable ();
4539 return true;
4542 /* Check whether it is possible to express the condition in USE by comparison
4543 of candidate CAND. If so, store the value compared with to BOUND, and the
4544 comparison operator to COMP. */
4546 static bool
4547 may_eliminate_iv (struct ivopts_data *data,
4548 struct iv_use *use, struct iv_cand *cand, tree *bound,
4549 enum tree_code *comp)
4551 basic_block ex_bb;
4552 edge exit;
4553 tree period;
4554 struct loop *loop = data->current_loop;
4555 aff_tree bnd;
4556 struct tree_niter_desc *desc = NULL;
4558 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4559 return false;
4561 /* For now works only for exits that dominate the loop latch.
4562 TODO: extend to other conditions inside loop body. */
4563 ex_bb = gimple_bb (use->stmt);
4564 if (use->stmt != last_stmt (ex_bb)
4565 || gimple_code (use->stmt) != GIMPLE_COND
4566 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4567 return false;
4569 exit = EDGE_SUCC (ex_bb, 0);
4570 if (flow_bb_inside_loop_p (loop, exit->dest))
4571 exit = EDGE_SUCC (ex_bb, 1);
4572 if (flow_bb_inside_loop_p (loop, exit->dest))
4573 return false;
4575 desc = niter_for_exit (data, exit);
4576 if (!desc)
4577 return false;
4579 /* Determine whether we can use the variable to test the exit condition.
4580 This is the case iff the period of the induction variable is greater
4581 than the number of iterations for which the exit condition is true. */
4582 period = iv_period (cand->iv);
4584 /* If the number of iterations is constant, compare against it directly. */
4585 if (TREE_CODE (desc->niter) == INTEGER_CST)
4587 /* See cand_value_at. */
4588 if (stmt_after_increment (loop, cand, use->stmt))
4590 if (!tree_int_cst_lt (desc->niter, period))
4591 return false;
4593 else
4595 if (tree_int_cst_lt (period, desc->niter))
4596 return false;
4600 /* If not, and if this is the only possible exit of the loop, see whether
4601 we can get a conservative estimate on the number of iterations of the
4602 entire loop and compare against that instead. */
4603 else
4605 double_int period_value, max_niter;
4607 max_niter = desc->max;
4608 if (stmt_after_increment (loop, cand, use->stmt))
4609 max_niter += double_int_one;
4610 period_value = tree_to_double_int (period);
4611 if (max_niter.ugt (period_value))
4613 /* See if we can take advantage of inferred loop bound information. */
4614 if (data->loop_single_exit_p)
4616 if (!max_loop_iterations (loop, &max_niter))
4617 return false;
4618 /* The loop bound is already adjusted by adding 1. */
4619 if (max_niter.ugt (period_value))
4620 return false;
4622 else
4623 return false;
4627 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4629 *bound = aff_combination_to_tree (&bnd);
4630 *comp = iv_elimination_compare (data, use);
4632 /* It is unlikely that computing the number of iterations using division
4633 would be more profitable than keeping the original induction variable. */
4634 if (expression_expensive_p (*bound))
4635 return false;
4637 /* Sometimes, it is possible to handle the situation that the number of
4638 iterations may be zero unless additional assumtions by using <
4639 instead of != in the exit condition.
4641 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4642 base the exit condition on it. However, that is often too
4643 expensive. */
4644 if (!integer_zerop (desc->may_be_zero))
4645 return iv_elimination_compare_lt (data, cand, comp, desc);
4647 return true;
4650 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4651 be copied, if is is used in the loop body and DATA->body_includes_call. */
4653 static int
4654 parm_decl_cost (struct ivopts_data *data, tree bound)
4656 tree sbound = bound;
4657 STRIP_NOPS (sbound);
4659 if (TREE_CODE (sbound) == SSA_NAME
4660 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4661 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4662 && data->body_includes_call)
4663 return COSTS_N_INSNS (1);
4665 return 0;
4668 /* Determines cost of basing replacement of USE on CAND in a condition. */
4670 static bool
4671 determine_use_iv_cost_condition (struct ivopts_data *data,
4672 struct iv_use *use, struct iv_cand *cand)
4674 tree bound = NULL_TREE;
4675 struct iv *cmp_iv;
4676 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4677 comp_cost elim_cost, express_cost, cost, bound_cost;
4678 bool ok;
4679 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4680 tree *control_var, *bound_cst;
4681 enum tree_code comp = ERROR_MARK;
4683 /* Only consider real candidates. */
4684 if (!cand->iv)
4686 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4687 ERROR_MARK, -1);
4688 return false;
4691 /* Try iv elimination. */
4692 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4694 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4695 if (elim_cost.cost == 0)
4696 elim_cost.cost = parm_decl_cost (data, bound);
4697 else if (TREE_CODE (bound) == INTEGER_CST)
4698 elim_cost.cost = 0;
4699 /* If we replace a loop condition 'i < n' with 'p < base + n',
4700 depends_on_elim will have 'base' and 'n' set, which implies
4701 that both 'base' and 'n' will be live during the loop. More likely,
4702 'base + n' will be loop invariant, resulting in only one live value
4703 during the loop. So in that case we clear depends_on_elim and set
4704 elim_inv_expr_id instead. */
4705 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4707 elim_inv_expr_id = get_expr_id (data, bound);
4708 bitmap_clear (depends_on_elim);
4710 /* The bound is a loop invariant, so it will be only computed
4711 once. */
4712 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4714 else
4715 elim_cost = infinite_cost;
4717 /* Try expressing the original giv. If it is compared with an invariant,
4718 note that we cannot get rid of it. */
4719 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4720 NULL, &cmp_iv);
4721 gcc_assert (ok);
4723 /* When the condition is a comparison of the candidate IV against
4724 zero, prefer this IV.
4726 TODO: The constant that we're subtracting from the cost should
4727 be target-dependent. This information should be added to the
4728 target costs for each backend. */
4729 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4730 && integer_zerop (*bound_cst)
4731 && (operand_equal_p (*control_var, cand->var_after, 0)
4732 || operand_equal_p (*control_var, cand->var_before, 0)))
4733 elim_cost.cost -= 1;
4735 express_cost = get_computation_cost (data, use, cand, false,
4736 &depends_on_express, NULL,
4737 &express_inv_expr_id);
4738 fd_ivopts_data = data;
4739 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4741 /* Count the cost of the original bound as well. */
4742 bound_cost = force_var_cost (data, *bound_cst, NULL);
4743 if (bound_cost.cost == 0)
4744 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4745 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4746 bound_cost.cost = 0;
4747 express_cost.cost += bound_cost.cost;
4749 /* Choose the better approach, preferring the eliminated IV. */
4750 if (compare_costs (elim_cost, express_cost) <= 0)
4752 cost = elim_cost;
4753 depends_on = depends_on_elim;
4754 depends_on_elim = NULL;
4755 inv_expr_id = elim_inv_expr_id;
4757 else
4759 cost = express_cost;
4760 depends_on = depends_on_express;
4761 depends_on_express = NULL;
4762 bound = NULL_TREE;
4763 comp = ERROR_MARK;
4764 inv_expr_id = express_inv_expr_id;
4767 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4769 if (depends_on_elim)
4770 BITMAP_FREE (depends_on_elim);
4771 if (depends_on_express)
4772 BITMAP_FREE (depends_on_express);
4774 return !infinite_cost_p (cost);
4777 /* Determines cost of basing replacement of USE on CAND. Returns false
4778 if USE cannot be based on CAND. */
4780 static bool
4781 determine_use_iv_cost (struct ivopts_data *data,
4782 struct iv_use *use, struct iv_cand *cand)
4784 switch (use->type)
4786 case USE_NONLINEAR_EXPR:
4787 return determine_use_iv_cost_generic (data, use, cand);
4789 case USE_ADDRESS:
4790 return determine_use_iv_cost_address (data, use, cand);
4792 case USE_COMPARE:
4793 return determine_use_iv_cost_condition (data, use, cand);
4795 default:
4796 gcc_unreachable ();
4800 /* Return true if get_computation_cost indicates that autoincrement is
4801 a possibility for the pair of USE and CAND, false otherwise. */
4803 static bool
4804 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4805 struct iv_cand *cand)
4807 bitmap depends_on;
4808 bool can_autoinc;
4809 comp_cost cost;
4811 if (use->type != USE_ADDRESS)
4812 return false;
4814 cost = get_computation_cost (data, use, cand, true, &depends_on,
4815 &can_autoinc, NULL);
4817 BITMAP_FREE (depends_on);
4819 return !infinite_cost_p (cost) && can_autoinc;
4822 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4823 use that allows autoincrement, and set their AINC_USE if possible. */
4825 static void
4826 set_autoinc_for_original_candidates (struct ivopts_data *data)
4828 unsigned i, j;
4830 for (i = 0; i < n_iv_cands (data); i++)
4832 struct iv_cand *cand = iv_cand (data, i);
4833 struct iv_use *closest = NULL;
4834 if (cand->pos != IP_ORIGINAL)
4835 continue;
4836 for (j = 0; j < n_iv_uses (data); j++)
4838 struct iv_use *use = iv_use (data, j);
4839 unsigned uid = gimple_uid (use->stmt);
4840 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4841 || uid > gimple_uid (cand->incremented_at))
4842 continue;
4843 if (closest == NULL || uid > gimple_uid (closest->stmt))
4844 closest = use;
4846 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4847 continue;
4848 cand->ainc_use = closest;
4852 /* Finds the candidates for the induction variables. */
4854 static void
4855 find_iv_candidates (struct ivopts_data *data)
4857 /* Add commonly used ivs. */
4858 add_standard_iv_candidates (data);
4860 /* Add old induction variables. */
4861 add_old_ivs_candidates (data);
4863 /* Add induction variables derived from uses. */
4864 add_derived_ivs_candidates (data);
4866 set_autoinc_for_original_candidates (data);
4868 /* Record the important candidates. */
4869 record_important_candidates (data);
4872 /* Determines costs of basing the use of the iv on an iv candidate. */
4874 static void
4875 determine_use_iv_costs (struct ivopts_data *data)
4877 unsigned i, j;
4878 struct iv_use *use;
4879 struct iv_cand *cand;
4880 bitmap to_clear = BITMAP_ALLOC (NULL);
4882 alloc_use_cost_map (data);
4884 for (i = 0; i < n_iv_uses (data); i++)
4886 use = iv_use (data, i);
4888 if (data->consider_all_candidates)
4890 for (j = 0; j < n_iv_cands (data); j++)
4892 cand = iv_cand (data, j);
4893 determine_use_iv_cost (data, use, cand);
4896 else
4898 bitmap_iterator bi;
4900 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4902 cand = iv_cand (data, j);
4903 if (!determine_use_iv_cost (data, use, cand))
4904 bitmap_set_bit (to_clear, j);
4907 /* Remove the candidates for that the cost is infinite from
4908 the list of related candidates. */
4909 bitmap_and_compl_into (use->related_cands, to_clear);
4910 bitmap_clear (to_clear);
4914 BITMAP_FREE (to_clear);
4916 if (dump_file && (dump_flags & TDF_DETAILS))
4918 fprintf (dump_file, "Use-candidate costs:\n");
4920 for (i = 0; i < n_iv_uses (data); i++)
4922 use = iv_use (data, i);
4924 fprintf (dump_file, "Use %d:\n", i);
4925 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4926 for (j = 0; j < use->n_map_members; j++)
4928 if (!use->cost_map[j].cand
4929 || infinite_cost_p (use->cost_map[j].cost))
4930 continue;
4932 fprintf (dump_file, " %d\t%d\t%d\t",
4933 use->cost_map[j].cand->id,
4934 use->cost_map[j].cost.cost,
4935 use->cost_map[j].cost.complexity);
4936 if (use->cost_map[j].depends_on)
4937 bitmap_print (dump_file,
4938 use->cost_map[j].depends_on, "","");
4939 if (use->cost_map[j].inv_expr_id != -1)
4940 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4941 fprintf (dump_file, "\n");
4944 fprintf (dump_file, "\n");
4946 fprintf (dump_file, "\n");
4950 /* Determines cost of the candidate CAND. */
4952 static void
4953 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4955 comp_cost cost_base;
4956 unsigned cost, cost_step;
4957 tree base;
4959 if (!cand->iv)
4961 cand->cost = 0;
4962 return;
4965 /* There are two costs associated with the candidate -- its increment
4966 and its initialization. The second is almost negligible for any loop
4967 that rolls enough, so we take it just very little into account. */
4969 base = cand->iv->base;
4970 cost_base = force_var_cost (data, base, NULL);
4971 /* It will be exceptional that the iv register happens to be initialized with
4972 the proper value at no cost. In general, there will at least be a regcopy
4973 or a const set. */
4974 if (cost_base.cost == 0)
4975 cost_base.cost = COSTS_N_INSNS (1);
4976 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4978 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4980 /* Prefer the original ivs unless we may gain something by replacing it.
4981 The reason is to make debugging simpler; so this is not relevant for
4982 artificial ivs created by other optimization passes. */
4983 if (cand->pos != IP_ORIGINAL
4984 || !SSA_NAME_VAR (cand->var_before)
4985 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4986 cost++;
4988 /* Prefer not to insert statements into latch unless there are some
4989 already (so that we do not create unnecessary jumps). */
4990 if (cand->pos == IP_END
4991 && empty_block_p (ip_end_pos (data->current_loop)))
4992 cost++;
4994 cand->cost = cost;
4995 cand->cost_step = cost_step;
4998 /* Determines costs of computation of the candidates. */
5000 static void
5001 determine_iv_costs (struct ivopts_data *data)
5003 unsigned i;
5005 if (dump_file && (dump_flags & TDF_DETAILS))
5007 fprintf (dump_file, "Candidate costs:\n");
5008 fprintf (dump_file, " cand\tcost\n");
5011 for (i = 0; i < n_iv_cands (data); i++)
5013 struct iv_cand *cand = iv_cand (data, i);
5015 determine_iv_cost (data, cand);
5017 if (dump_file && (dump_flags & TDF_DETAILS))
5018 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5021 if (dump_file && (dump_flags & TDF_DETAILS))
5022 fprintf (dump_file, "\n");
5025 /* Calculates cost for having SIZE induction variables. */
5027 static unsigned
5028 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5030 /* We add size to the cost, so that we prefer eliminating ivs
5031 if possible. */
5032 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5033 data->body_includes_call);
5036 /* For each size of the induction variable set determine the penalty. */
5038 static void
5039 determine_set_costs (struct ivopts_data *data)
5041 unsigned j, n;
5042 gimple phi;
5043 gimple_stmt_iterator psi;
5044 tree op;
5045 struct loop *loop = data->current_loop;
5046 bitmap_iterator bi;
5048 if (dump_file && (dump_flags & TDF_DETAILS))
5050 fprintf (dump_file, "Global costs:\n");
5051 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5052 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5053 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5054 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5057 n = 0;
5058 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5060 phi = gsi_stmt (psi);
5061 op = PHI_RESULT (phi);
5063 if (virtual_operand_p (op))
5064 continue;
5066 if (get_iv (data, op))
5067 continue;
5069 n++;
5072 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5074 struct version_info *info = ver_info (data, j);
5076 if (info->inv_id && info->has_nonlin_use)
5077 n++;
5080 data->regs_used = n;
5081 if (dump_file && (dump_flags & TDF_DETAILS))
5082 fprintf (dump_file, " regs_used %d\n", n);
5084 if (dump_file && (dump_flags & TDF_DETAILS))
5086 fprintf (dump_file, " cost for size:\n");
5087 fprintf (dump_file, " ivs\tcost\n");
5088 for (j = 0; j <= 2 * target_avail_regs; j++)
5089 fprintf (dump_file, " %d\t%d\n", j,
5090 ivopts_global_cost_for_size (data, j));
5091 fprintf (dump_file, "\n");
5095 /* Returns true if A is a cheaper cost pair than B. */
5097 static bool
5098 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5100 int cmp;
5102 if (!a)
5103 return false;
5105 if (!b)
5106 return true;
5108 cmp = compare_costs (a->cost, b->cost);
5109 if (cmp < 0)
5110 return true;
5112 if (cmp > 0)
5113 return false;
5115 /* In case the costs are the same, prefer the cheaper candidate. */
5116 if (a->cand->cost < b->cand->cost)
5117 return true;
5119 return false;
5123 /* Returns candidate by that USE is expressed in IVS. */
5125 static struct cost_pair *
5126 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5128 return ivs->cand_for_use[use->id];
5131 /* Computes the cost field of IVS structure. */
5133 static void
5134 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5136 comp_cost cost = ivs->cand_use_cost;
5138 cost.cost += ivs->cand_cost;
5140 cost.cost += ivopts_global_cost_for_size (data,
5141 ivs->n_regs + ivs->num_used_inv_expr);
5143 ivs->cost = cost;
5146 /* Remove invariants in set INVS to set IVS. */
5148 static void
5149 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5151 bitmap_iterator bi;
5152 unsigned iid;
5154 if (!invs)
5155 return;
5157 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5159 ivs->n_invariant_uses[iid]--;
5160 if (ivs->n_invariant_uses[iid] == 0)
5161 ivs->n_regs--;
5165 /* Set USE not to be expressed by any candidate in IVS. */
5167 static void
5168 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5169 struct iv_use *use)
5171 unsigned uid = use->id, cid;
5172 struct cost_pair *cp;
5174 cp = ivs->cand_for_use[uid];
5175 if (!cp)
5176 return;
5177 cid = cp->cand->id;
5179 ivs->bad_uses++;
5180 ivs->cand_for_use[uid] = NULL;
5181 ivs->n_cand_uses[cid]--;
5183 if (ivs->n_cand_uses[cid] == 0)
5185 bitmap_clear_bit (ivs->cands, cid);
5186 /* Do not count the pseudocandidates. */
5187 if (cp->cand->iv)
5188 ivs->n_regs--;
5189 ivs->n_cands--;
5190 ivs->cand_cost -= cp->cand->cost;
5192 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5195 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5197 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5199 if (cp->inv_expr_id != -1)
5201 ivs->used_inv_expr[cp->inv_expr_id]--;
5202 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5203 ivs->num_used_inv_expr--;
5205 iv_ca_recount_cost (data, ivs);
5208 /* Add invariants in set INVS to set IVS. */
5210 static void
5211 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5213 bitmap_iterator bi;
5214 unsigned iid;
5216 if (!invs)
5217 return;
5219 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5221 ivs->n_invariant_uses[iid]++;
5222 if (ivs->n_invariant_uses[iid] == 1)
5223 ivs->n_regs++;
5227 /* Set cost pair for USE in set IVS to CP. */
5229 static void
5230 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5231 struct iv_use *use, struct cost_pair *cp)
5233 unsigned uid = use->id, cid;
5235 if (ivs->cand_for_use[uid] == cp)
5236 return;
5238 if (ivs->cand_for_use[uid])
5239 iv_ca_set_no_cp (data, ivs, use);
5241 if (cp)
5243 cid = cp->cand->id;
5245 ivs->bad_uses--;
5246 ivs->cand_for_use[uid] = cp;
5247 ivs->n_cand_uses[cid]++;
5248 if (ivs->n_cand_uses[cid] == 1)
5250 bitmap_set_bit (ivs->cands, cid);
5251 /* Do not count the pseudocandidates. */
5252 if (cp->cand->iv)
5253 ivs->n_regs++;
5254 ivs->n_cands++;
5255 ivs->cand_cost += cp->cand->cost;
5257 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5260 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5261 iv_ca_set_add_invariants (ivs, cp->depends_on);
5263 if (cp->inv_expr_id != -1)
5265 ivs->used_inv_expr[cp->inv_expr_id]++;
5266 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5267 ivs->num_used_inv_expr++;
5269 iv_ca_recount_cost (data, ivs);
5273 /* Extend set IVS by expressing USE by some of the candidates in it
5274 if possible. All important candidates will be considered
5275 if IMPORTANT_CANDIDATES is true. */
5277 static void
5278 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5279 struct iv_use *use, bool important_candidates)
5281 struct cost_pair *best_cp = NULL, *cp;
5282 bitmap_iterator bi;
5283 bitmap cands;
5284 unsigned i;
5286 gcc_assert (ivs->upto >= use->id);
5288 if (ivs->upto == use->id)
5290 ivs->upto++;
5291 ivs->bad_uses++;
5294 cands = (important_candidates ? data->important_candidates : ivs->cands);
5295 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5297 struct iv_cand *cand = iv_cand (data, i);
5299 cp = get_use_iv_cost (data, use, cand);
5301 if (cheaper_cost_pair (cp, best_cp))
5302 best_cp = cp;
5305 iv_ca_set_cp (data, ivs, use, best_cp);
5308 /* Get cost for assignment IVS. */
5310 static comp_cost
5311 iv_ca_cost (struct iv_ca *ivs)
5313 /* This was a conditional expression but it triggered a bug in
5314 Sun C 5.5. */
5315 if (ivs->bad_uses)
5316 return infinite_cost;
5317 else
5318 return ivs->cost;
5321 /* Returns true if all dependences of CP are among invariants in IVS. */
5323 static bool
5324 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5326 unsigned i;
5327 bitmap_iterator bi;
5329 if (!cp->depends_on)
5330 return true;
5332 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5334 if (ivs->n_invariant_uses[i] == 0)
5335 return false;
5338 return true;
5341 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5342 it before NEXT_CHANGE. */
5344 static struct iv_ca_delta *
5345 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5346 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5348 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5350 change->use = use;
5351 change->old_cp = old_cp;
5352 change->new_cp = new_cp;
5353 change->next_change = next_change;
5355 return change;
5358 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5359 are rewritten. */
5361 static struct iv_ca_delta *
5362 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5364 struct iv_ca_delta *last;
5366 if (!l2)
5367 return l1;
5369 if (!l1)
5370 return l2;
5372 for (last = l1; last->next_change; last = last->next_change)
5373 continue;
5374 last->next_change = l2;
5376 return l1;
5379 /* Reverse the list of changes DELTA, forming the inverse to it. */
5381 static struct iv_ca_delta *
5382 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5384 struct iv_ca_delta *act, *next, *prev = NULL;
5385 struct cost_pair *tmp;
5387 for (act = delta; act; act = next)
5389 next = act->next_change;
5390 act->next_change = prev;
5391 prev = act;
5393 tmp = act->old_cp;
5394 act->old_cp = act->new_cp;
5395 act->new_cp = tmp;
5398 return prev;
5401 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5402 reverted instead. */
5404 static void
5405 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5406 struct iv_ca_delta *delta, bool forward)
5408 struct cost_pair *from, *to;
5409 struct iv_ca_delta *act;
5411 if (!forward)
5412 delta = iv_ca_delta_reverse (delta);
5414 for (act = delta; act; act = act->next_change)
5416 from = act->old_cp;
5417 to = act->new_cp;
5418 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5419 iv_ca_set_cp (data, ivs, act->use, to);
5422 if (!forward)
5423 iv_ca_delta_reverse (delta);
5426 /* Returns true if CAND is used in IVS. */
5428 static bool
5429 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5431 return ivs->n_cand_uses[cand->id] > 0;
5434 /* Returns number of induction variable candidates in the set IVS. */
5436 static unsigned
5437 iv_ca_n_cands (struct iv_ca *ivs)
5439 return ivs->n_cands;
5442 /* Free the list of changes DELTA. */
5444 static void
5445 iv_ca_delta_free (struct iv_ca_delta **delta)
5447 struct iv_ca_delta *act, *next;
5449 for (act = *delta; act; act = next)
5451 next = act->next_change;
5452 free (act);
5455 *delta = NULL;
5458 /* Allocates new iv candidates assignment. */
5460 static struct iv_ca *
5461 iv_ca_new (struct ivopts_data *data)
5463 struct iv_ca *nw = XNEW (struct iv_ca);
5465 nw->upto = 0;
5466 nw->bad_uses = 0;
5467 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5468 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5469 nw->cands = BITMAP_ALLOC (NULL);
5470 nw->n_cands = 0;
5471 nw->n_regs = 0;
5472 nw->cand_use_cost = no_cost;
5473 nw->cand_cost = 0;
5474 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5475 nw->cost = no_cost;
5476 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5477 nw->num_used_inv_expr = 0;
5479 return nw;
5482 /* Free memory occupied by the set IVS. */
5484 static void
5485 iv_ca_free (struct iv_ca **ivs)
5487 free ((*ivs)->cand_for_use);
5488 free ((*ivs)->n_cand_uses);
5489 BITMAP_FREE ((*ivs)->cands);
5490 free ((*ivs)->n_invariant_uses);
5491 free ((*ivs)->used_inv_expr);
5492 free (*ivs);
5493 *ivs = NULL;
5496 /* Dumps IVS to FILE. */
5498 static void
5499 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5501 const char *pref = " invariants ";
5502 unsigned i;
5503 comp_cost cost = iv_ca_cost (ivs);
5505 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5506 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5507 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5508 bitmap_print (file, ivs->cands, " candidates: ","\n");
5510 for (i = 0; i < ivs->upto; i++)
5512 struct iv_use *use = iv_use (data, i);
5513 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5514 if (cp)
5515 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5516 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5517 else
5518 fprintf (file, " use:%d --> ??\n", use->id);
5521 for (i = 1; i <= data->max_inv_id; i++)
5522 if (ivs->n_invariant_uses[i])
5524 fprintf (file, "%s%d", pref, i);
5525 pref = ", ";
5527 fprintf (file, "\n\n");
5530 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5531 new set, and store differences in DELTA. Number of induction variables
5532 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5533 the function will try to find a solution with mimimal iv candidates. */
5535 static comp_cost
5536 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5537 struct iv_cand *cand, struct iv_ca_delta **delta,
5538 unsigned *n_ivs, bool min_ncand)
5540 unsigned i;
5541 comp_cost cost;
5542 struct iv_use *use;
5543 struct cost_pair *old_cp, *new_cp;
5545 *delta = NULL;
5546 for (i = 0; i < ivs->upto; i++)
5548 use = iv_use (data, i);
5549 old_cp = iv_ca_cand_for_use (ivs, use);
5551 if (old_cp
5552 && old_cp->cand == cand)
5553 continue;
5555 new_cp = get_use_iv_cost (data, use, cand);
5556 if (!new_cp)
5557 continue;
5559 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5560 continue;
5562 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5563 continue;
5565 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5568 iv_ca_delta_commit (data, ivs, *delta, true);
5569 cost = iv_ca_cost (ivs);
5570 if (n_ivs)
5571 *n_ivs = iv_ca_n_cands (ivs);
5572 iv_ca_delta_commit (data, ivs, *delta, false);
5574 return cost;
5577 /* Try narrowing set IVS by removing CAND. Return the cost of
5578 the new set and store the differences in DELTA. */
5580 static comp_cost
5581 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5582 struct iv_cand *cand, struct iv_ca_delta **delta)
5584 unsigned i, ci;
5585 struct iv_use *use;
5586 struct cost_pair *old_cp, *new_cp, *cp;
5587 bitmap_iterator bi;
5588 struct iv_cand *cnd;
5589 comp_cost cost;
5591 *delta = NULL;
5592 for (i = 0; i < n_iv_uses (data); i++)
5594 use = iv_use (data, i);
5596 old_cp = iv_ca_cand_for_use (ivs, use);
5597 if (old_cp->cand != cand)
5598 continue;
5600 new_cp = NULL;
5602 if (data->consider_all_candidates)
5604 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5606 if (ci == cand->id)
5607 continue;
5609 cnd = iv_cand (data, ci);
5611 cp = get_use_iv_cost (data, use, cnd);
5612 if (!cp)
5613 continue;
5615 if (!iv_ca_has_deps (ivs, cp))
5616 continue;
5618 if (!cheaper_cost_pair (cp, new_cp))
5619 continue;
5621 new_cp = cp;
5624 else
5626 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5628 if (ci == cand->id)
5629 continue;
5631 cnd = iv_cand (data, ci);
5633 cp = get_use_iv_cost (data, use, cnd);
5634 if (!cp)
5635 continue;
5636 if (!iv_ca_has_deps (ivs, cp))
5637 continue;
5639 if (!cheaper_cost_pair (cp, new_cp))
5640 continue;
5642 new_cp = cp;
5646 if (!new_cp)
5648 iv_ca_delta_free (delta);
5649 return infinite_cost;
5652 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5655 iv_ca_delta_commit (data, ivs, *delta, true);
5656 cost = iv_ca_cost (ivs);
5657 iv_ca_delta_commit (data, ivs, *delta, false);
5659 return cost;
5662 /* Try optimizing the set of candidates IVS by removing candidates different
5663 from to EXCEPT_CAND from it. Return cost of the new set, and store
5664 differences in DELTA. */
5666 static comp_cost
5667 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5668 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5670 bitmap_iterator bi;
5671 struct iv_ca_delta *act_delta, *best_delta;
5672 unsigned i;
5673 comp_cost best_cost, acost;
5674 struct iv_cand *cand;
5676 best_delta = NULL;
5677 best_cost = iv_ca_cost (ivs);
5679 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5681 cand = iv_cand (data, i);
5683 if (cand == except_cand)
5684 continue;
5686 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5688 if (compare_costs (acost, best_cost) < 0)
5690 best_cost = acost;
5691 iv_ca_delta_free (&best_delta);
5692 best_delta = act_delta;
5694 else
5695 iv_ca_delta_free (&act_delta);
5698 if (!best_delta)
5700 *delta = NULL;
5701 return best_cost;
5704 /* Recurse to possibly remove other unnecessary ivs. */
5705 iv_ca_delta_commit (data, ivs, best_delta, true);
5706 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5707 iv_ca_delta_commit (data, ivs, best_delta, false);
5708 *delta = iv_ca_delta_join (best_delta, *delta);
5709 return best_cost;
5712 /* Tries to extend the sets IVS in the best possible way in order
5713 to express the USE. If ORIGINALP is true, prefer candidates from
5714 the original set of IVs, otherwise favor important candidates not
5715 based on any memory object. */
5717 static bool
5718 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5719 struct iv_use *use, bool originalp)
5721 comp_cost best_cost, act_cost;
5722 unsigned i;
5723 bitmap_iterator bi;
5724 struct iv_cand *cand;
5725 struct iv_ca_delta *best_delta = NULL, *act_delta;
5726 struct cost_pair *cp;
5728 iv_ca_add_use (data, ivs, use, false);
5729 best_cost = iv_ca_cost (ivs);
5731 cp = iv_ca_cand_for_use (ivs, use);
5732 if (!cp)
5734 ivs->upto--;
5735 ivs->bad_uses--;
5736 iv_ca_add_use (data, ivs, use, true);
5737 best_cost = iv_ca_cost (ivs);
5738 cp = iv_ca_cand_for_use (ivs, use);
5740 if (cp)
5742 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5743 iv_ca_set_no_cp (data, ivs, use);
5746 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5747 first try important candidates not based on any memory object. Only if
5748 this fails, try the specific ones. Rationale -- in loops with many
5749 variables the best choice often is to use just one generic biv. If we
5750 added here many ivs specific to the uses, the optimization algorithm later
5751 would be likely to get stuck in a local minimum, thus causing us to create
5752 too many ivs. The approach from few ivs to more seems more likely to be
5753 successful -- starting from few ivs, replacing an expensive use by a
5754 specific iv should always be a win. */
5755 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5757 cand = iv_cand (data, i);
5759 if (originalp && cand->pos !=IP_ORIGINAL)
5760 continue;
5762 if (!originalp && cand->iv->base_object != NULL_TREE)
5763 continue;
5765 if (iv_ca_cand_used_p (ivs, cand))
5766 continue;
5768 cp = get_use_iv_cost (data, use, cand);
5769 if (!cp)
5770 continue;
5772 iv_ca_set_cp (data, ivs, use, cp);
5773 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5774 true);
5775 iv_ca_set_no_cp (data, ivs, use);
5776 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5778 if (compare_costs (act_cost, best_cost) < 0)
5780 best_cost = act_cost;
5782 iv_ca_delta_free (&best_delta);
5783 best_delta = act_delta;
5785 else
5786 iv_ca_delta_free (&act_delta);
5789 if (infinite_cost_p (best_cost))
5791 for (i = 0; i < use->n_map_members; i++)
5793 cp = use->cost_map + i;
5794 cand = cp->cand;
5795 if (!cand)
5796 continue;
5798 /* Already tried this. */
5799 if (cand->important)
5801 if (originalp && cand->pos == IP_ORIGINAL)
5802 continue;
5803 if (!originalp && cand->iv->base_object == NULL_TREE)
5804 continue;
5807 if (iv_ca_cand_used_p (ivs, cand))
5808 continue;
5810 act_delta = NULL;
5811 iv_ca_set_cp (data, ivs, use, cp);
5812 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5813 iv_ca_set_no_cp (data, ivs, use);
5814 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5815 cp, act_delta);
5817 if (compare_costs (act_cost, best_cost) < 0)
5819 best_cost = act_cost;
5821 if (best_delta)
5822 iv_ca_delta_free (&best_delta);
5823 best_delta = act_delta;
5825 else
5826 iv_ca_delta_free (&act_delta);
5830 iv_ca_delta_commit (data, ivs, best_delta, true);
5831 iv_ca_delta_free (&best_delta);
5833 return !infinite_cost_p (best_cost);
5836 /* Finds an initial assignment of candidates to uses. */
5838 static struct iv_ca *
5839 get_initial_solution (struct ivopts_data *data, bool originalp)
5841 struct iv_ca *ivs = iv_ca_new (data);
5842 unsigned i;
5844 for (i = 0; i < n_iv_uses (data); i++)
5845 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5847 iv_ca_free (&ivs);
5848 return NULL;
5851 return ivs;
5854 /* Tries to improve set of induction variables IVS. */
5856 static bool
5857 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5859 unsigned i, n_ivs;
5860 comp_cost acost, best_cost = iv_ca_cost (ivs);
5861 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5862 struct iv_cand *cand;
5864 /* Try extending the set of induction variables by one. */
5865 for (i = 0; i < n_iv_cands (data); i++)
5867 cand = iv_cand (data, i);
5869 if (iv_ca_cand_used_p (ivs, cand))
5870 continue;
5872 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5873 if (!act_delta)
5874 continue;
5876 /* If we successfully added the candidate and the set is small enough,
5877 try optimizing it by removing other candidates. */
5878 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5880 iv_ca_delta_commit (data, ivs, act_delta, true);
5881 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5882 iv_ca_delta_commit (data, ivs, act_delta, false);
5883 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5886 if (compare_costs (acost, best_cost) < 0)
5888 best_cost = acost;
5889 iv_ca_delta_free (&best_delta);
5890 best_delta = act_delta;
5892 else
5893 iv_ca_delta_free (&act_delta);
5896 if (!best_delta)
5898 /* Try removing the candidates from the set instead. */
5899 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5901 /* Nothing more we can do. */
5902 if (!best_delta)
5903 return false;
5906 iv_ca_delta_commit (data, ivs, best_delta, true);
5907 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5908 iv_ca_delta_free (&best_delta);
5909 return true;
5912 /* Attempts to find the optimal set of induction variables. We do simple
5913 greedy heuristic -- we try to replace at most one candidate in the selected
5914 solution and remove the unused ivs while this improves the cost. */
5916 static struct iv_ca *
5917 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5919 struct iv_ca *set;
5921 /* Get the initial solution. */
5922 set = get_initial_solution (data, originalp);
5923 if (!set)
5925 if (dump_file && (dump_flags & TDF_DETAILS))
5926 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5927 return NULL;
5930 if (dump_file && (dump_flags & TDF_DETAILS))
5932 fprintf (dump_file, "Initial set of candidates:\n");
5933 iv_ca_dump (data, dump_file, set);
5936 while (try_improve_iv_set (data, set))
5938 if (dump_file && (dump_flags & TDF_DETAILS))
5940 fprintf (dump_file, "Improved to:\n");
5941 iv_ca_dump (data, dump_file, set);
5945 return set;
5948 static struct iv_ca *
5949 find_optimal_iv_set (struct ivopts_data *data)
5951 unsigned i;
5952 struct iv_ca *set, *origset;
5953 struct iv_use *use;
5954 comp_cost cost, origcost;
5956 /* Determine the cost based on a strategy that starts with original IVs,
5957 and try again using a strategy that prefers candidates not based
5958 on any IVs. */
5959 origset = find_optimal_iv_set_1 (data, true);
5960 set = find_optimal_iv_set_1 (data, false);
5962 if (!origset && !set)
5963 return NULL;
5965 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5966 cost = set ? iv_ca_cost (set) : infinite_cost;
5968 if (dump_file && (dump_flags & TDF_DETAILS))
5970 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5971 origcost.cost, origcost.complexity);
5972 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5973 cost.cost, cost.complexity);
5976 /* Choose the one with the best cost. */
5977 if (compare_costs (origcost, cost) <= 0)
5979 if (set)
5980 iv_ca_free (&set);
5981 set = origset;
5983 else if (origset)
5984 iv_ca_free (&origset);
5986 for (i = 0; i < n_iv_uses (data); i++)
5988 use = iv_use (data, i);
5989 use->selected = iv_ca_cand_for_use (set, use)->cand;
5992 return set;
5995 /* Creates a new induction variable corresponding to CAND. */
5997 static void
5998 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6000 gimple_stmt_iterator incr_pos;
6001 tree base;
6002 bool after = false;
6004 if (!cand->iv)
6005 return;
6007 switch (cand->pos)
6009 case IP_NORMAL:
6010 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6011 break;
6013 case IP_END:
6014 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6015 after = true;
6016 break;
6018 case IP_AFTER_USE:
6019 after = true;
6020 /* fall through */
6021 case IP_BEFORE_USE:
6022 incr_pos = gsi_for_stmt (cand->incremented_at);
6023 break;
6025 case IP_ORIGINAL:
6026 /* Mark that the iv is preserved. */
6027 name_info (data, cand->var_before)->preserve_biv = true;
6028 name_info (data, cand->var_after)->preserve_biv = true;
6030 /* Rewrite the increment so that it uses var_before directly. */
6031 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6032 return;
6035 gimple_add_tmp_var (cand->var_before);
6037 base = unshare_expr (cand->iv->base);
6039 create_iv (base, unshare_expr (cand->iv->step),
6040 cand->var_before, data->current_loop,
6041 &incr_pos, after, &cand->var_before, &cand->var_after);
6044 /* Creates new induction variables described in SET. */
6046 static void
6047 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6049 unsigned i;
6050 struct iv_cand *cand;
6051 bitmap_iterator bi;
6053 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6055 cand = iv_cand (data, i);
6056 create_new_iv (data, cand);
6059 if (dump_file && (dump_flags & TDF_DETAILS))
6061 fprintf (dump_file, "\nSelected IV set: \n");
6062 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6064 cand = iv_cand (data, i);
6065 dump_cand (dump_file, cand);
6067 fprintf (dump_file, "\n");
6071 /* Rewrites USE (definition of iv used in a nonlinear expression)
6072 using candidate CAND. */
6074 static void
6075 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6076 struct iv_use *use, struct iv_cand *cand)
6078 tree comp;
6079 tree op, tgt;
6080 gimple ass;
6081 gimple_stmt_iterator bsi;
6083 /* An important special case -- if we are asked to express value of
6084 the original iv by itself, just exit; there is no need to
6085 introduce a new computation (that might also need casting the
6086 variable to unsigned and back). */
6087 if (cand->pos == IP_ORIGINAL
6088 && cand->incremented_at == use->stmt)
6090 enum tree_code stmt_code;
6092 gcc_assert (is_gimple_assign (use->stmt));
6093 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6095 /* Check whether we may leave the computation unchanged.
6096 This is the case only if it does not rely on other
6097 computations in the loop -- otherwise, the computation
6098 we rely upon may be removed in remove_unused_ivs,
6099 thus leading to ICE. */
6100 stmt_code = gimple_assign_rhs_code (use->stmt);
6101 if (stmt_code == PLUS_EXPR
6102 || stmt_code == MINUS_EXPR
6103 || stmt_code == POINTER_PLUS_EXPR)
6105 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6106 op = gimple_assign_rhs2 (use->stmt);
6107 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6108 op = gimple_assign_rhs1 (use->stmt);
6109 else
6110 op = NULL_TREE;
6112 else
6113 op = NULL_TREE;
6115 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6116 return;
6119 comp = get_computation (data->current_loop, use, cand);
6120 gcc_assert (comp != NULL_TREE);
6122 switch (gimple_code (use->stmt))
6124 case GIMPLE_PHI:
6125 tgt = PHI_RESULT (use->stmt);
6127 /* If we should keep the biv, do not replace it. */
6128 if (name_info (data, tgt)->preserve_biv)
6129 return;
6131 bsi = gsi_after_labels (gimple_bb (use->stmt));
6132 break;
6134 case GIMPLE_ASSIGN:
6135 tgt = gimple_assign_lhs (use->stmt);
6136 bsi = gsi_for_stmt (use->stmt);
6137 break;
6139 default:
6140 gcc_unreachable ();
6143 if (!valid_gimple_rhs_p (comp)
6144 || (gimple_code (use->stmt) != GIMPLE_PHI
6145 /* We can't allow re-allocating the stmt as it might be pointed
6146 to still. */
6147 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6148 >= gimple_num_ops (gsi_stmt (bsi)))))
6150 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6151 true, GSI_SAME_STMT);
6152 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6154 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6155 /* As this isn't a plain copy we have to reset alignment
6156 information. */
6157 if (SSA_NAME_PTR_INFO (comp))
6158 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6162 if (gimple_code (use->stmt) == GIMPLE_PHI)
6164 ass = gimple_build_assign (tgt, comp);
6165 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6167 bsi = gsi_for_stmt (use->stmt);
6168 remove_phi_node (&bsi, false);
6170 else
6172 gimple_assign_set_rhs_from_tree (&bsi, comp);
6173 use->stmt = gsi_stmt (bsi);
6177 /* Performs a peephole optimization to reorder the iv update statement with
6178 a mem ref to enable instruction combining in later phases. The mem ref uses
6179 the iv value before the update, so the reordering transformation requires
6180 adjustment of the offset. CAND is the selected IV_CAND.
6182 Example:
6184 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6185 iv2 = iv1 + 1;
6187 if (t < val) (1)
6188 goto L;
6189 goto Head;
6192 directly propagating t over to (1) will introduce overlapping live range
6193 thus increase register pressure. This peephole transform it into:
6196 iv2 = iv1 + 1;
6197 t = MEM_REF (base, iv2, 8, 8);
6198 if (t < val)
6199 goto L;
6200 goto Head;
6203 static void
6204 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6206 tree var_after;
6207 gimple iv_update, stmt;
6208 basic_block bb;
6209 gimple_stmt_iterator gsi, gsi_iv;
6211 if (cand->pos != IP_NORMAL)
6212 return;
6214 var_after = cand->var_after;
6215 iv_update = SSA_NAME_DEF_STMT (var_after);
6217 bb = gimple_bb (iv_update);
6218 gsi = gsi_last_nondebug_bb (bb);
6219 stmt = gsi_stmt (gsi);
6221 /* Only handle conditional statement for now. */
6222 if (gimple_code (stmt) != GIMPLE_COND)
6223 return;
6225 gsi_prev_nondebug (&gsi);
6226 stmt = gsi_stmt (gsi);
6227 if (stmt != iv_update)
6228 return;
6230 gsi_prev_nondebug (&gsi);
6231 if (gsi_end_p (gsi))
6232 return;
6234 stmt = gsi_stmt (gsi);
6235 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6236 return;
6238 if (stmt != use->stmt)
6239 return;
6241 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6242 return;
6244 if (dump_file && (dump_flags & TDF_DETAILS))
6246 fprintf (dump_file, "Reordering \n");
6247 print_gimple_stmt (dump_file, iv_update, 0, 0);
6248 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6249 fprintf (dump_file, "\n");
6252 gsi = gsi_for_stmt (use->stmt);
6253 gsi_iv = gsi_for_stmt (iv_update);
6254 gsi_move_before (&gsi_iv, &gsi);
6256 cand->pos = IP_BEFORE_USE;
6257 cand->incremented_at = use->stmt;
6260 /* Rewrites USE (address that is an iv) using candidate CAND. */
6262 static void
6263 rewrite_use_address (struct ivopts_data *data,
6264 struct iv_use *use, struct iv_cand *cand)
6266 aff_tree aff;
6267 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6268 tree base_hint = NULL_TREE;
6269 tree ref, iv;
6270 bool ok;
6272 adjust_iv_update_pos (cand, use);
6273 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6274 gcc_assert (ok);
6275 unshare_aff_combination (&aff);
6277 /* To avoid undefined overflow problems, all IV candidates use unsigned
6278 integer types. The drawback is that this makes it impossible for
6279 create_mem_ref to distinguish an IV that is based on a memory object
6280 from one that represents simply an offset.
6282 To work around this problem, we pass a hint to create_mem_ref that
6283 indicates which variable (if any) in aff is an IV based on a memory
6284 object. Note that we only consider the candidate. If this is not
6285 based on an object, the base of the reference is in some subexpression
6286 of the use -- but these will use pointer types, so they are recognized
6287 by the create_mem_ref heuristics anyway. */
6288 if (cand->iv->base_object)
6289 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6291 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6292 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6293 reference_alias_ptr_type (*use->op_p),
6294 iv, base_hint, data->speed);
6295 copy_ref_info (ref, *use->op_p);
6296 *use->op_p = ref;
6299 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6300 candidate CAND. */
6302 static void
6303 rewrite_use_compare (struct ivopts_data *data,
6304 struct iv_use *use, struct iv_cand *cand)
6306 tree comp, *var_p, op, bound;
6307 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6308 enum tree_code compare;
6309 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6310 bool ok;
6312 bound = cp->value;
6313 if (bound)
6315 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6316 tree var_type = TREE_TYPE (var);
6317 gimple_seq stmts;
6319 if (dump_file && (dump_flags & TDF_DETAILS))
6321 fprintf (dump_file, "Replacing exit test: ");
6322 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6324 compare = cp->comp;
6325 bound = unshare_expr (fold_convert (var_type, bound));
6326 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6327 if (stmts)
6328 gsi_insert_seq_on_edge_immediate (
6329 loop_preheader_edge (data->current_loop),
6330 stmts);
6332 gimple_cond_set_lhs (use->stmt, var);
6333 gimple_cond_set_code (use->stmt, compare);
6334 gimple_cond_set_rhs (use->stmt, op);
6335 return;
6338 /* The induction variable elimination failed; just express the original
6339 giv. */
6340 comp = get_computation (data->current_loop, use, cand);
6341 gcc_assert (comp != NULL_TREE);
6343 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6344 gcc_assert (ok);
6346 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6347 true, GSI_SAME_STMT);
6350 /* Rewrites USE using candidate CAND. */
6352 static void
6353 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6355 switch (use->type)
6357 case USE_NONLINEAR_EXPR:
6358 rewrite_use_nonlinear_expr (data, use, cand);
6359 break;
6361 case USE_ADDRESS:
6362 rewrite_use_address (data, use, cand);
6363 break;
6365 case USE_COMPARE:
6366 rewrite_use_compare (data, use, cand);
6367 break;
6369 default:
6370 gcc_unreachable ();
6373 update_stmt (use->stmt);
6376 /* Rewrite the uses using the selected induction variables. */
6378 static void
6379 rewrite_uses (struct ivopts_data *data)
6381 unsigned i;
6382 struct iv_cand *cand;
6383 struct iv_use *use;
6385 for (i = 0; i < n_iv_uses (data); i++)
6387 use = iv_use (data, i);
6388 cand = use->selected;
6389 gcc_assert (cand);
6391 rewrite_use (data, use, cand);
6395 /* Removes the ivs that are not used after rewriting. */
6397 static void
6398 remove_unused_ivs (struct ivopts_data *data)
6400 unsigned j;
6401 bitmap_iterator bi;
6402 bitmap toremove = BITMAP_ALLOC (NULL);
6404 /* Figure out an order in which to release SSA DEFs so that we don't
6405 release something that we'd have to propagate into a debug stmt
6406 afterwards. */
6407 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6409 struct version_info *info;
6411 info = ver_info (data, j);
6412 if (info->iv
6413 && !integer_zerop (info->iv->step)
6414 && !info->inv_id
6415 && !info->iv->have_use_for
6416 && !info->preserve_biv)
6418 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6420 tree def = info->iv->ssa_name;
6422 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6424 imm_use_iterator imm_iter;
6425 use_operand_p use_p;
6426 gimple stmt;
6427 int count = 0;
6429 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6431 if (!gimple_debug_bind_p (stmt))
6432 continue;
6434 /* We just want to determine whether to do nothing
6435 (count == 0), to substitute the computed
6436 expression into a single use of the SSA DEF by
6437 itself (count == 1), or to use a debug temp
6438 because the SSA DEF is used multiple times or as
6439 part of a larger expression (count > 1). */
6440 count++;
6441 if (gimple_debug_bind_get_value (stmt) != def)
6442 count++;
6444 if (count > 1)
6445 BREAK_FROM_IMM_USE_STMT (imm_iter);
6448 if (!count)
6449 continue;
6451 struct iv_use dummy_use;
6452 struct iv_cand *best_cand = NULL, *cand;
6453 unsigned i, best_pref = 0, cand_pref;
6455 memset (&dummy_use, 0, sizeof (dummy_use));
6456 dummy_use.iv = info->iv;
6457 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6459 cand = iv_use (data, i)->selected;
6460 if (cand == best_cand)
6461 continue;
6462 cand_pref = operand_equal_p (cand->iv->step,
6463 info->iv->step, 0)
6464 ? 4 : 0;
6465 cand_pref
6466 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6467 == TYPE_MODE (TREE_TYPE (info->iv->base))
6468 ? 2 : 0;
6469 cand_pref
6470 += TREE_CODE (cand->iv->base) == INTEGER_CST
6471 ? 1 : 0;
6472 if (best_cand == NULL || best_pref < cand_pref)
6474 best_cand = cand;
6475 best_pref = cand_pref;
6479 if (!best_cand)
6480 continue;
6482 tree comp = get_computation_at (data->current_loop,
6483 &dummy_use, best_cand,
6484 SSA_NAME_DEF_STMT (def));
6485 if (!comp)
6486 continue;
6488 if (count > 1)
6490 tree vexpr = make_node (DEBUG_EXPR_DECL);
6491 DECL_ARTIFICIAL (vexpr) = 1;
6492 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6493 if (SSA_NAME_VAR (def))
6494 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6495 else
6496 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6497 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6498 gimple_stmt_iterator gsi;
6500 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6501 gsi = gsi_after_labels (gimple_bb
6502 (SSA_NAME_DEF_STMT (def)));
6503 else
6504 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6506 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6507 comp = vexpr;
6510 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6512 if (!gimple_debug_bind_p (stmt))
6513 continue;
6515 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6516 SET_USE (use_p, comp);
6518 update_stmt (stmt);
6524 release_defs_bitset (toremove);
6526 BITMAP_FREE (toremove);
6529 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6530 for pointer_map_traverse. */
6532 static bool
6533 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6534 void *data ATTRIBUTE_UNUSED)
6536 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6538 free (niter);
6539 return true;
6542 /* Frees data allocated by the optimization of a single loop. */
6544 static void
6545 free_loop_data (struct ivopts_data *data)
6547 unsigned i, j;
6548 bitmap_iterator bi;
6549 tree obj;
6551 if (data->niters)
6553 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6554 pointer_map_destroy (data->niters);
6555 data->niters = NULL;
6558 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6560 struct version_info *info;
6562 info = ver_info (data, i);
6563 free (info->iv);
6564 info->iv = NULL;
6565 info->has_nonlin_use = false;
6566 info->preserve_biv = false;
6567 info->inv_id = 0;
6569 bitmap_clear (data->relevant);
6570 bitmap_clear (data->important_candidates);
6572 for (i = 0; i < n_iv_uses (data); i++)
6574 struct iv_use *use = iv_use (data, i);
6576 free (use->iv);
6577 BITMAP_FREE (use->related_cands);
6578 for (j = 0; j < use->n_map_members; j++)
6579 if (use->cost_map[j].depends_on)
6580 BITMAP_FREE (use->cost_map[j].depends_on);
6581 free (use->cost_map);
6582 free (use);
6584 data->iv_uses.truncate (0);
6586 for (i = 0; i < n_iv_cands (data); i++)
6588 struct iv_cand *cand = iv_cand (data, i);
6590 free (cand->iv);
6591 if (cand->depends_on)
6592 BITMAP_FREE (cand->depends_on);
6593 free (cand);
6595 data->iv_candidates.truncate (0);
6597 if (data->version_info_size < num_ssa_names)
6599 data->version_info_size = 2 * num_ssa_names;
6600 free (data->version_info);
6601 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6604 data->max_inv_id = 0;
6606 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6607 SET_DECL_RTL (obj, NULL_RTX);
6609 decl_rtl_to_reset.truncate (0);
6611 htab_empty (data->inv_expr_tab);
6612 data->inv_expr_id = 0;
6615 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6616 loop tree. */
6618 static void
6619 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6621 free_loop_data (data);
6622 free (data->version_info);
6623 BITMAP_FREE (data->relevant);
6624 BITMAP_FREE (data->important_candidates);
6626 decl_rtl_to_reset.release ();
6627 data->iv_uses.release ();
6628 data->iv_candidates.release ();
6629 htab_delete (data->inv_expr_tab);
6632 /* Returns true if the loop body BODY includes any function calls. */
6634 static bool
6635 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6637 gimple_stmt_iterator gsi;
6638 unsigned i;
6640 for (i = 0; i < num_nodes; i++)
6641 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6643 gimple stmt = gsi_stmt (gsi);
6644 if (is_gimple_call (stmt)
6645 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6646 return true;
6648 return false;
6651 /* Optimizes the LOOP. Returns true if anything changed. */
6653 static bool
6654 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6656 bool changed = false;
6657 struct iv_ca *iv_ca;
6658 edge exit = single_dom_exit (loop);
6659 basic_block *body;
6661 gcc_assert (!data->niters);
6662 data->current_loop = loop;
6663 data->speed = optimize_loop_for_speed_p (loop);
6665 if (dump_file && (dump_flags & TDF_DETAILS))
6667 fprintf (dump_file, "Processing loop %d\n", loop->num);
6669 if (exit)
6671 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6672 exit->src->index, exit->dest->index);
6673 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6674 fprintf (dump_file, "\n");
6677 fprintf (dump_file, "\n");
6680 body = get_loop_body (loop);
6681 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6682 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6683 free (body);
6685 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6687 /* For each ssa name determines whether it behaves as an induction variable
6688 in some loop. */
6689 if (!find_induction_variables (data))
6690 goto finish;
6692 /* Finds interesting uses (item 1). */
6693 find_interesting_uses (data);
6694 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6695 goto finish;
6697 /* Finds candidates for the induction variables (item 2). */
6698 find_iv_candidates (data);
6700 /* Calculates the costs (item 3, part 1). */
6701 determine_iv_costs (data);
6702 determine_use_iv_costs (data);
6703 determine_set_costs (data);
6705 /* Find the optimal set of induction variables (item 3, part 2). */
6706 iv_ca = find_optimal_iv_set (data);
6707 if (!iv_ca)
6708 goto finish;
6709 changed = true;
6711 /* Create the new induction variables (item 4, part 1). */
6712 create_new_ivs (data, iv_ca);
6713 iv_ca_free (&iv_ca);
6715 /* Rewrite the uses (item 4, part 2). */
6716 rewrite_uses (data);
6718 /* Remove the ivs that are unused after rewriting. */
6719 remove_unused_ivs (data);
6721 /* We have changed the structure of induction variables; it might happen
6722 that definitions in the scev database refer to some of them that were
6723 eliminated. */
6724 scev_reset ();
6726 finish:
6727 free_loop_data (data);
6729 return changed;
6732 /* Main entry point. Optimizes induction variables in loops. */
6734 void
6735 tree_ssa_iv_optimize (void)
6737 struct loop *loop;
6738 struct ivopts_data data;
6739 loop_iterator li;
6741 tree_ssa_iv_optimize_init (&data);
6743 /* Optimize the loops starting with the innermost ones. */
6744 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6746 if (dump_file && (dump_flags & TDF_DETAILS))
6747 flow_loop_dump (loop, dump_file, NULL, 1);
6749 tree_ssa_iv_optimize_loop (&data, loop);
6752 tree_ssa_iv_optimize_finalize (&data);