PR target/60104
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob92cebe5973544dbd7d2ed94124ac0f04fafd6e76
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "predict.h"
72 #include "vec.h"
73 #include "hashtab.h"
74 #include "hash-set.h"
75 #include "machmode.h"
76 #include "hard-reg-set.h"
77 #include "input.h"
78 #include "function.h"
79 #include "dominance.h"
80 #include "cfg.h"
81 #include "basic-block.h"
82 #include "gimple-pretty-print.h"
83 #include "hash-map.h"
84 #include "hash-table.h"
85 #include "tree-ssa-alias.h"
86 #include "internal-fn.h"
87 #include "tree-eh.h"
88 #include "gimple-expr.h"
89 #include "is-a.h"
90 #include "gimple.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "gimple-ssa.h"
95 #include "plugin-api.h"
96 #include "ipa-ref.h"
97 #include "cgraph.h"
98 #include "tree-cfg.h"
99 #include "tree-phinodes.h"
100 #include "ssa-iterators.h"
101 #include "stringpool.h"
102 #include "tree-ssanames.h"
103 #include "tree-ssa-loop-ivopts.h"
104 #include "tree-ssa-loop-manip.h"
105 #include "tree-ssa-loop-niter.h"
106 #include "tree-ssa-loop.h"
107 #include "expr.h"
108 #include "tree-dfa.h"
109 #include "tree-ssa.h"
110 #include "cfgloop.h"
111 #include "tree-pass.h"
112 #include "insn-config.h"
113 #include "tree-chrec.h"
114 #include "tree-scalar-evolution.h"
115 #include "cfgloop.h"
116 #include "params.h"
117 #include "langhooks.h"
118 #include "tree-affine.h"
119 #include "target.h"
120 #include "tree-inline.h"
121 #include "tree-ssa-propagate.h"
122 #include "expmed.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
127 cost of different addressing modes. This should be moved to a TBD
128 interface between the GIMPLE and RTL worlds. */
129 #include "expr.h"
130 #include "recog.h"
132 /* The infinite cost. */
133 #define INFTY 10000000
135 #define AVG_LOOP_NITER(LOOP) 5
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
148 return niter;
151 /* Representation of the induction variable. */
152 struct iv
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 bool biv_p; /* Is it a biv? */
159 bool have_use_for; /* Do we already have a use for it? */
160 unsigned use_id; /* The identifier in the use if it is the case. */
163 /* Per-ssa version information (induction variable descriptions, etc.). */
164 struct version_info
166 tree name; /* The ssa name. */
167 struct iv *iv; /* Induction variable description. */
168 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
169 an expression that is not an induction variable. */
170 bool preserve_biv; /* For the original biv, whether to preserve it. */
171 unsigned inv_id; /* Id of an invariant. */
174 /* Types of uses. */
175 enum use_type
177 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
178 USE_ADDRESS, /* Use in an address. */
179 USE_COMPARE /* Use is a compare. */
182 /* Cost of a computation. */
183 typedef struct
185 int cost; /* The runtime cost. */
186 unsigned complexity; /* The estimate of the complexity of the code for
187 the computation (in no concrete units --
188 complexity field should be larger for more
189 complex expressions and addressing modes). */
190 } comp_cost;
192 static const comp_cost no_cost = {0, 0};
193 static const comp_cost infinite_cost = {INFTY, INFTY};
195 /* The candidate - cost pair. */
196 struct cost_pair
198 struct iv_cand *cand; /* The candidate. */
199 comp_cost cost; /* The cost. */
200 bitmap depends_on; /* The list of invariants that have to be
201 preserved. */
202 tree value; /* For final value elimination, the expression for
203 the final value of the iv. For iv elimination,
204 the new bound to compare with. */
205 enum tree_code comp; /* For iv elimination, the comparison. */
206 int inv_expr_id; /* Loop invariant expression id. */
209 /* Use. */
210 struct iv_use
212 unsigned id; /* The id of the use. */
213 enum use_type type; /* Type of the use. */
214 struct iv *iv; /* The induction variable it is based on. */
215 gimple stmt; /* Statement in that it occurs. */
216 tree *op_p; /* The place where it occurs. */
217 bitmap related_cands; /* The set of "related" iv candidates, plus the common
218 important ones. */
220 unsigned n_map_members; /* Number of candidates in the cost_map list. */
221 struct cost_pair *cost_map;
222 /* The costs wrto the iv candidates. */
224 struct iv_cand *selected;
225 /* The selected candidate. */
228 /* The position where the iv is computed. */
229 enum iv_position
231 IP_NORMAL, /* At the end, just before the exit condition. */
232 IP_END, /* At the end of the latch block. */
233 IP_BEFORE_USE, /* Immediately before a specific use. */
234 IP_AFTER_USE, /* Immediately after a specific use. */
235 IP_ORIGINAL /* The original biv. */
238 /* The induction variable candidate. */
239 struct iv_cand
241 unsigned id; /* The number of the candidate. */
242 bool important; /* Whether this is an "important" candidate, i.e. such
243 that it should be considered by all uses. */
244 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
245 gimple incremented_at;/* For original biv, the statement where it is
246 incremented. */
247 tree var_before; /* The variable used for it before increment. */
248 tree var_after; /* The variable used for it after increment. */
249 struct iv *iv; /* The value of the candidate. NULL for
250 "pseudocandidate" used to indicate the possibility
251 to replace the final value of an iv by direct
252 computation of the value. */
253 unsigned cost; /* Cost of the candidate. */
254 unsigned cost_step; /* Cost of the candidate's increment operation. */
255 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
256 where it is incremented. */
257 bitmap depends_on; /* The list of invariants that are used in step of the
258 biv. */
261 /* Loop invariant expression hashtable entry. */
262 struct iv_inv_expr_ent
264 tree expr;
265 int id;
266 hashval_t hash;
269 /* The data used by the induction variable optimizations. */
271 typedef struct iv_use *iv_use_p;
273 typedef struct iv_cand *iv_cand_p;
275 /* Hashtable helpers. */
277 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
279 typedef iv_inv_expr_ent value_type;
280 typedef iv_inv_expr_ent compare_type;
281 static inline hashval_t hash (const value_type *);
282 static inline bool equal (const value_type *, const compare_type *);
285 /* Hash function for loop invariant expressions. */
287 inline hashval_t
288 iv_inv_expr_hasher::hash (const value_type *expr)
290 return expr->hash;
293 /* Hash table equality function for expressions. */
295 inline bool
296 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
298 return expr1->hash == expr2->hash
299 && operand_equal_p (expr1->expr, expr2->expr, 0);
302 struct ivopts_data
304 /* The currently optimized loop. */
305 struct loop *current_loop;
307 /* Numbers of iterations for all exits of the current loop. */
308 hash_map<edge, tree_niter_desc *> *niters;
310 /* Number of registers used in it. */
311 unsigned regs_used;
313 /* The size of version_info array allocated. */
314 unsigned version_info_size;
316 /* The array of information for the ssa names. */
317 struct version_info *version_info;
319 /* The hashtable of loop invariant expressions created
320 by ivopt. */
321 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
323 /* Loop invariant expression id. */
324 int inv_expr_id;
326 /* The bitmap of indices in version_info whose value was changed. */
327 bitmap relevant;
329 /* The uses of induction variables. */
330 vec<iv_use_p> iv_uses;
332 /* The candidates. */
333 vec<iv_cand_p> iv_candidates;
335 /* A bitmap of important candidates. */
336 bitmap important_candidates;
338 /* Cache used by tree_to_aff_combination_expand. */
339 hash_map<tree, name_expansion *> *name_expansion_cache;
341 /* The maximum invariant id. */
342 unsigned max_inv_id;
344 /* Whether to consider just related and important candidates when replacing a
345 use. */
346 bool consider_all_candidates;
348 /* Are we optimizing for speed? */
349 bool speed;
351 /* Whether the loop body includes any function calls. */
352 bool body_includes_call;
354 /* Whether the loop body can only be exited via single exit. */
355 bool loop_single_exit_p;
358 /* An assignment of iv candidates to uses. */
360 struct iv_ca
362 /* The number of uses covered by the assignment. */
363 unsigned upto;
365 /* Number of uses that cannot be expressed by the candidates in the set. */
366 unsigned bad_uses;
368 /* Candidate assigned to a use, together with the related costs. */
369 struct cost_pair **cand_for_use;
371 /* Number of times each candidate is used. */
372 unsigned *n_cand_uses;
374 /* The candidates used. */
375 bitmap cands;
377 /* The number of candidates in the set. */
378 unsigned n_cands;
380 /* Total number of registers needed. */
381 unsigned n_regs;
383 /* Total cost of expressing uses. */
384 comp_cost cand_use_cost;
386 /* Total cost of candidates. */
387 unsigned cand_cost;
389 /* Number of times each invariant is used. */
390 unsigned *n_invariant_uses;
392 /* The array holding the number of uses of each loop
393 invariant expressions created by ivopt. */
394 unsigned *used_inv_expr;
396 /* The number of created loop invariants. */
397 unsigned num_used_inv_expr;
399 /* Total cost of the assignment. */
400 comp_cost cost;
403 /* Difference of two iv candidate assignments. */
405 struct iv_ca_delta
407 /* Changed use. */
408 struct iv_use *use;
410 /* An old assignment (for rollback purposes). */
411 struct cost_pair *old_cp;
413 /* A new assignment. */
414 struct cost_pair *new_cp;
416 /* Next change in the list. */
417 struct iv_ca_delta *next_change;
420 /* Bound on number of candidates below that all candidates are considered. */
422 #define CONSIDER_ALL_CANDIDATES_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
426 optimizing such a loop would help, and it would take ages). */
428 #define MAX_CONSIDERED_USES \
429 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
431 /* If there are at most this number of ivs in the set, try removing unnecessary
432 ivs from the set always. */
434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
435 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
437 /* The list of trees for that the decl_rtl field must be reset is stored
438 here. */
440 static vec<tree> decl_rtl_to_reset;
442 static comp_cost force_expr_to_var_cost (tree, bool);
444 /* Number of uses recorded in DATA. */
446 static inline unsigned
447 n_iv_uses (struct ivopts_data *data)
449 return data->iv_uses.length ();
452 /* Ith use recorded in DATA. */
454 static inline struct iv_use *
455 iv_use (struct ivopts_data *data, unsigned i)
457 return data->iv_uses[i];
460 /* Number of candidates recorded in DATA. */
462 static inline unsigned
463 n_iv_cands (struct ivopts_data *data)
465 return data->iv_candidates.length ();
468 /* Ith candidate recorded in DATA. */
470 static inline struct iv_cand *
471 iv_cand (struct ivopts_data *data, unsigned i)
473 return data->iv_candidates[i];
476 /* The single loop exit if it dominates the latch, NULL otherwise. */
478 edge
479 single_dom_exit (struct loop *loop)
481 edge exit = single_exit (loop);
483 if (!exit)
484 return NULL;
486 if (!just_once_each_iteration_p (loop, exit->src))
487 return NULL;
489 return exit;
492 /* Dumps information about the induction variable IV to FILE. */
494 void
495 dump_iv (FILE *file, struct iv *iv)
497 if (iv->ssa_name)
499 fprintf (file, "ssa name ");
500 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
501 fprintf (file, "\n");
504 fprintf (file, " type ");
505 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
506 fprintf (file, "\n");
508 if (iv->step)
510 fprintf (file, " base ");
511 print_generic_expr (file, iv->base, TDF_SLIM);
512 fprintf (file, "\n");
514 fprintf (file, " step ");
515 print_generic_expr (file, iv->step, TDF_SLIM);
516 fprintf (file, "\n");
518 else
520 fprintf (file, " invariant ");
521 print_generic_expr (file, iv->base, TDF_SLIM);
522 fprintf (file, "\n");
525 if (iv->base_object)
527 fprintf (file, " base object ");
528 print_generic_expr (file, iv->base_object, TDF_SLIM);
529 fprintf (file, "\n");
532 if (iv->biv_p)
533 fprintf (file, " is a biv\n");
536 /* Dumps information about the USE to FILE. */
538 void
539 dump_use (FILE *file, struct iv_use *use)
541 fprintf (file, "use %d\n", use->id);
543 switch (use->type)
545 case USE_NONLINEAR_EXPR:
546 fprintf (file, " generic\n");
547 break;
549 case USE_ADDRESS:
550 fprintf (file, " address\n");
551 break;
553 case USE_COMPARE:
554 fprintf (file, " compare\n");
555 break;
557 default:
558 gcc_unreachable ();
561 fprintf (file, " in statement ");
562 print_gimple_stmt (file, use->stmt, 0, 0);
563 fprintf (file, "\n");
565 fprintf (file, " at position ");
566 if (use->op_p)
567 print_generic_expr (file, *use->op_p, TDF_SLIM);
568 fprintf (file, "\n");
570 dump_iv (file, use->iv);
572 if (use->related_cands)
574 fprintf (file, " related candidates ");
575 dump_bitmap (file, use->related_cands);
579 /* Dumps information about the uses to FILE. */
581 void
582 dump_uses (FILE *file, struct ivopts_data *data)
584 unsigned i;
585 struct iv_use *use;
587 for (i = 0; i < n_iv_uses (data); i++)
589 use = iv_use (data, i);
591 dump_use (file, use);
592 fprintf (file, "\n");
596 /* Dumps information about induction variable candidate CAND to FILE. */
598 void
599 dump_cand (FILE *file, struct iv_cand *cand)
601 struct iv *iv = cand->iv;
603 fprintf (file, "candidate %d%s\n",
604 cand->id, cand->important ? " (important)" : "");
606 if (cand->depends_on)
608 fprintf (file, " depends on ");
609 dump_bitmap (file, cand->depends_on);
612 if (!iv)
614 fprintf (file, " final value replacement\n");
615 return;
618 if (cand->var_before)
620 fprintf (file, " var_before ");
621 print_generic_expr (file, cand->var_before, TDF_SLIM);
622 fprintf (file, "\n");
624 if (cand->var_after)
626 fprintf (file, " var_after ");
627 print_generic_expr (file, cand->var_after, TDF_SLIM);
628 fprintf (file, "\n");
631 switch (cand->pos)
633 case IP_NORMAL:
634 fprintf (file, " incremented before exit test\n");
635 break;
637 case IP_BEFORE_USE:
638 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
639 break;
641 case IP_AFTER_USE:
642 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
643 break;
645 case IP_END:
646 fprintf (file, " incremented at end\n");
647 break;
649 case IP_ORIGINAL:
650 fprintf (file, " original biv\n");
651 break;
654 dump_iv (file, iv);
657 /* Returns the info for ssa version VER. */
659 static inline struct version_info *
660 ver_info (struct ivopts_data *data, unsigned ver)
662 return data->version_info + ver;
665 /* Returns the info for ssa name NAME. */
667 static inline struct version_info *
668 name_info (struct ivopts_data *data, tree name)
670 return ver_info (data, SSA_NAME_VERSION (name));
673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
674 emitted in LOOP. */
676 static bool
677 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
679 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
681 gcc_assert (bb);
683 if (sbb == loop->latch)
684 return true;
686 if (sbb != bb)
687 return false;
689 return stmt == last_stmt (bb);
692 /* Returns true if STMT if after the place where the original induction
693 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
694 if the positions are identical. */
696 static bool
697 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
699 basic_block cand_bb = gimple_bb (cand->incremented_at);
700 basic_block stmt_bb = gimple_bb (stmt);
702 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
703 return false;
705 if (stmt_bb != cand_bb)
706 return true;
708 if (true_if_equal
709 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
710 return true;
711 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
714 /* Returns true if STMT if after the place where the induction variable
715 CAND is incremented in LOOP. */
717 static bool
718 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
720 switch (cand->pos)
722 case IP_END:
723 return false;
725 case IP_NORMAL:
726 return stmt_after_ip_normal_pos (loop, stmt);
728 case IP_ORIGINAL:
729 case IP_AFTER_USE:
730 return stmt_after_inc_pos (cand, stmt, false);
732 case IP_BEFORE_USE:
733 return stmt_after_inc_pos (cand, stmt, true);
735 default:
736 gcc_unreachable ();
740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
742 static bool
743 abnormal_ssa_name_p (tree exp)
745 if (!exp)
746 return false;
748 if (TREE_CODE (exp) != SSA_NAME)
749 return false;
751 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
755 abnormal phi node. Callback for for_each_index. */
757 static bool
758 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
759 void *data ATTRIBUTE_UNUSED)
761 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
763 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
764 return false;
765 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
766 return false;
769 return !abnormal_ssa_name_p (*index);
772 /* Returns true if EXPR contains a ssa name that occurs in an
773 abnormal phi node. */
775 bool
776 contains_abnormal_ssa_name_p (tree expr)
778 enum tree_code code;
779 enum tree_code_class codeclass;
781 if (!expr)
782 return false;
784 code = TREE_CODE (expr);
785 codeclass = TREE_CODE_CLASS (code);
787 if (code == SSA_NAME)
788 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
790 if (code == INTEGER_CST
791 || is_gimple_min_invariant (expr))
792 return false;
794 if (code == ADDR_EXPR)
795 return !for_each_index (&TREE_OPERAND (expr, 0),
796 idx_contains_abnormal_ssa_name_p,
797 NULL);
799 if (code == COND_EXPR)
800 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
801 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
802 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
804 switch (codeclass)
806 case tcc_binary:
807 case tcc_comparison:
808 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
809 return true;
811 /* Fallthru. */
812 case tcc_unary:
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
814 return true;
816 break;
818 default:
819 gcc_unreachable ();
822 return false;
825 /* Returns the structure describing number of iterations determined from
826 EXIT of DATA->current_loop, or NULL if something goes wrong. */
828 static struct tree_niter_desc *
829 niter_for_exit (struct ivopts_data *data, edge exit)
831 struct tree_niter_desc *desc;
832 tree_niter_desc **slot;
834 if (!data->niters)
836 data->niters = new hash_map<edge, tree_niter_desc *>;
837 slot = NULL;
839 else
840 slot = data->niters->get (exit);
842 if (!slot)
844 /* Try to determine number of iterations. We cannot safely work with ssa
845 names that appear in phi nodes on abnormal edges, so that we do not
846 create overlapping life ranges for them (PR 27283). */
847 desc = XNEW (struct tree_niter_desc);
848 if (!number_of_iterations_exit (data->current_loop,
849 exit, desc, true)
850 || contains_abnormal_ssa_name_p (desc->niter))
852 XDELETE (desc);
853 desc = NULL;
855 data->niters->put (exit, desc);
857 else
858 desc = *slot;
860 return desc;
863 /* Returns the structure describing number of iterations determined from
864 single dominating exit of DATA->current_loop, or NULL if something
865 goes wrong. */
867 static struct tree_niter_desc *
868 niter_for_single_dom_exit (struct ivopts_data *data)
870 edge exit = single_dom_exit (data->current_loop);
872 if (!exit)
873 return NULL;
875 return niter_for_exit (data, exit);
878 /* Initializes data structures used by the iv optimization pass, stored
879 in DATA. */
881 static void
882 tree_ssa_iv_optimize_init (struct ivopts_data *data)
884 data->version_info_size = 2 * num_ssa_names;
885 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
886 data->relevant = BITMAP_ALLOC (NULL);
887 data->important_candidates = BITMAP_ALLOC (NULL);
888 data->max_inv_id = 0;
889 data->niters = NULL;
890 data->iv_uses.create (20);
891 data->iv_candidates.create (20);
892 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
893 data->inv_expr_id = 0;
894 data->name_expansion_cache = NULL;
895 decl_rtl_to_reset.create (20);
898 /* Returns a memory object to that EXPR points. In case we are able to
899 determine that it does not point to any such object, NULL is returned. */
901 static tree
902 determine_base_object (tree expr)
904 enum tree_code code = TREE_CODE (expr);
905 tree base, obj;
907 /* If this is a pointer casted to any type, we need to determine
908 the base object for the pointer; so handle conversions before
909 throwing away non-pointer expressions. */
910 if (CONVERT_EXPR_P (expr))
911 return determine_base_object (TREE_OPERAND (expr, 0));
913 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
914 return NULL_TREE;
916 switch (code)
918 case INTEGER_CST:
919 return NULL_TREE;
921 case ADDR_EXPR:
922 obj = TREE_OPERAND (expr, 0);
923 base = get_base_address (obj);
925 if (!base)
926 return expr;
928 if (TREE_CODE (base) == MEM_REF)
929 return determine_base_object (TREE_OPERAND (base, 0));
931 return fold_convert (ptr_type_node,
932 build_fold_addr_expr (base));
934 case POINTER_PLUS_EXPR:
935 return determine_base_object (TREE_OPERAND (expr, 0));
937 case PLUS_EXPR:
938 case MINUS_EXPR:
939 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
940 gcc_unreachable ();
942 default:
943 return fold_convert (ptr_type_node, expr);
947 /* Return true if address expression with non-DECL_P operand appears
948 in EXPR. */
950 static bool
951 contain_complex_addr_expr (tree expr)
953 bool res = false;
955 STRIP_NOPS (expr);
956 switch (TREE_CODE (expr))
958 case POINTER_PLUS_EXPR:
959 case PLUS_EXPR:
960 case MINUS_EXPR:
961 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
962 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
963 break;
965 case ADDR_EXPR:
966 return (!DECL_P (TREE_OPERAND (expr, 0)));
968 default:
969 return false;
972 return res;
975 /* Allocates an induction variable with given initial value BASE and step STEP
976 for loop LOOP. */
978 static struct iv *
979 alloc_iv (tree base, tree step)
981 tree expr = base;
982 struct iv *iv = XCNEW (struct iv);
983 gcc_assert (step != NULL_TREE);
985 /* Lower address expression in base except ones with DECL_P as operand.
986 By doing this:
987 1) More accurate cost can be computed for address expressions;
988 2) Duplicate candidates won't be created for bases in different
989 forms, like &a[0] and &a. */
990 STRIP_NOPS (expr);
991 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
992 || contain_complex_addr_expr (expr))
994 aff_tree comb;
995 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
996 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
999 iv->base = base;
1000 iv->base_object = determine_base_object (base);
1001 iv->step = step;
1002 iv->biv_p = false;
1003 iv->have_use_for = false;
1004 iv->use_id = 0;
1005 iv->ssa_name = NULL_TREE;
1007 return iv;
1010 /* Sets STEP and BASE for induction variable IV. */
1012 static void
1013 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1015 struct version_info *info = name_info (data, iv);
1017 gcc_assert (!info->iv);
1019 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1020 info->iv = alloc_iv (base, step);
1021 info->iv->ssa_name = iv;
1024 /* Finds induction variable declaration for VAR. */
1026 static struct iv *
1027 get_iv (struct ivopts_data *data, tree var)
1029 basic_block bb;
1030 tree type = TREE_TYPE (var);
1032 if (!POINTER_TYPE_P (type)
1033 && !INTEGRAL_TYPE_P (type))
1034 return NULL;
1036 if (!name_info (data, var)->iv)
1038 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1040 if (!bb
1041 || !flow_bb_inside_loop_p (data->current_loop, bb))
1042 set_iv (data, var, var, build_int_cst (type, 0));
1045 return name_info (data, var)->iv;
1048 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1049 not define a simple affine biv with nonzero step. */
1051 static tree
1052 determine_biv_step (gimple phi)
1054 struct loop *loop = gimple_bb (phi)->loop_father;
1055 tree name = PHI_RESULT (phi);
1056 affine_iv iv;
1058 if (virtual_operand_p (name))
1059 return NULL_TREE;
1061 if (!simple_iv (loop, loop, name, &iv, true))
1062 return NULL_TREE;
1064 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1067 /* Finds basic ivs. */
1069 static bool
1070 find_bivs (struct ivopts_data *data)
1072 gimple phi;
1073 tree step, type, base;
1074 bool found = false;
1075 struct loop *loop = data->current_loop;
1076 gimple_stmt_iterator psi;
1078 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1080 phi = gsi_stmt (psi);
1082 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1083 continue;
1085 step = determine_biv_step (phi);
1086 if (!step)
1087 continue;
1089 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1090 base = expand_simple_operations (base);
1091 if (contains_abnormal_ssa_name_p (base)
1092 || contains_abnormal_ssa_name_p (step))
1093 continue;
1095 type = TREE_TYPE (PHI_RESULT (phi));
1096 base = fold_convert (type, base);
1097 if (step)
1099 if (POINTER_TYPE_P (type))
1100 step = convert_to_ptrofftype (step);
1101 else
1102 step = fold_convert (type, step);
1105 set_iv (data, PHI_RESULT (phi), base, step);
1106 found = true;
1109 return found;
1112 /* Marks basic ivs. */
1114 static void
1115 mark_bivs (struct ivopts_data *data)
1117 gimple phi, def;
1118 tree var;
1119 struct iv *iv, *incr_iv;
1120 struct loop *loop = data->current_loop;
1121 basic_block incr_bb;
1122 gimple_stmt_iterator psi;
1124 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1126 phi = gsi_stmt (psi);
1128 iv = get_iv (data, PHI_RESULT (phi));
1129 if (!iv)
1130 continue;
1132 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1133 def = SSA_NAME_DEF_STMT (var);
1134 /* Don't mark iv peeled from other one as biv. */
1135 if (def
1136 && gimple_code (def) == GIMPLE_PHI
1137 && gimple_bb (def) == loop->header)
1138 continue;
1140 incr_iv = get_iv (data, var);
1141 if (!incr_iv)
1142 continue;
1144 /* If the increment is in the subloop, ignore it. */
1145 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1146 if (incr_bb->loop_father != data->current_loop
1147 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1148 continue;
1150 iv->biv_p = true;
1151 incr_iv->biv_p = true;
1155 /* Checks whether STMT defines a linear induction variable and stores its
1156 parameters to IV. */
1158 static bool
1159 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1161 tree lhs;
1162 struct loop *loop = data->current_loop;
1164 iv->base = NULL_TREE;
1165 iv->step = NULL_TREE;
1167 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1168 return false;
1170 lhs = gimple_assign_lhs (stmt);
1171 if (TREE_CODE (lhs) != SSA_NAME)
1172 return false;
1174 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1175 return false;
1176 iv->base = expand_simple_operations (iv->base);
1178 if (contains_abnormal_ssa_name_p (iv->base)
1179 || contains_abnormal_ssa_name_p (iv->step))
1180 return false;
1182 /* If STMT could throw, then do not consider STMT as defining a GIV.
1183 While this will suppress optimizations, we can not safely delete this
1184 GIV and associated statements, even if it appears it is not used. */
1185 if (stmt_could_throw_p (stmt))
1186 return false;
1188 return true;
1191 /* Finds general ivs in statement STMT. */
1193 static void
1194 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1196 affine_iv iv;
1198 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1199 return;
1201 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1204 /* Finds general ivs in basic block BB. */
1206 static void
1207 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1209 gimple_stmt_iterator bsi;
1211 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1212 find_givs_in_stmt (data, gsi_stmt (bsi));
1215 /* Finds general ivs. */
1217 static void
1218 find_givs (struct ivopts_data *data)
1220 struct loop *loop = data->current_loop;
1221 basic_block *body = get_loop_body_in_dom_order (loop);
1222 unsigned i;
1224 for (i = 0; i < loop->num_nodes; i++)
1225 find_givs_in_bb (data, body[i]);
1226 free (body);
1229 /* For each ssa name defined in LOOP determines whether it is an induction
1230 variable and if so, its initial value and step. */
1232 static bool
1233 find_induction_variables (struct ivopts_data *data)
1235 unsigned i;
1236 bitmap_iterator bi;
1238 if (!find_bivs (data))
1239 return false;
1241 find_givs (data);
1242 mark_bivs (data);
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1246 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1248 if (niter)
1250 fprintf (dump_file, " number of iterations ");
1251 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1252 if (!integer_zerop (niter->may_be_zero))
1254 fprintf (dump_file, "; zero if ");
1255 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1257 fprintf (dump_file, "\n\n");
1260 fprintf (dump_file, "Induction variables:\n\n");
1262 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1264 if (ver_info (data, i)->iv)
1265 dump_iv (dump_file, ver_info (data, i)->iv);
1269 return true;
1272 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1274 static struct iv_use *
1275 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1276 gimple stmt, enum use_type use_type)
1278 struct iv_use *use = XCNEW (struct iv_use);
1280 use->id = n_iv_uses (data);
1281 use->type = use_type;
1282 use->iv = iv;
1283 use->stmt = stmt;
1284 use->op_p = use_p;
1285 use->related_cands = BITMAP_ALLOC (NULL);
1287 /* To avoid showing ssa name in the dumps, if it was not reset by the
1288 caller. */
1289 iv->ssa_name = NULL_TREE;
1291 if (dump_file && (dump_flags & TDF_DETAILS))
1292 dump_use (dump_file, use);
1294 data->iv_uses.safe_push (use);
1296 return use;
1299 /* Checks whether OP is a loop-level invariant and if so, records it.
1300 NONLINEAR_USE is true if the invariant is used in a way we do not
1301 handle specially. */
1303 static void
1304 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1306 basic_block bb;
1307 struct version_info *info;
1309 if (TREE_CODE (op) != SSA_NAME
1310 || virtual_operand_p (op))
1311 return;
1313 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1314 if (bb
1315 && flow_bb_inside_loop_p (data->current_loop, bb))
1316 return;
1318 info = name_info (data, op);
1319 info->name = op;
1320 info->has_nonlin_use |= nonlinear_use;
1321 if (!info->inv_id)
1322 info->inv_id = ++data->max_inv_id;
1323 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1326 /* Checks whether the use OP is interesting and if so, records it. */
1328 static struct iv_use *
1329 find_interesting_uses_op (struct ivopts_data *data, tree op)
1331 struct iv *iv;
1332 struct iv *civ;
1333 gimple stmt;
1334 struct iv_use *use;
1336 if (TREE_CODE (op) != SSA_NAME)
1337 return NULL;
1339 iv = get_iv (data, op);
1340 if (!iv)
1341 return NULL;
1343 if (iv->have_use_for)
1345 use = iv_use (data, iv->use_id);
1347 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1348 return use;
1351 if (integer_zerop (iv->step))
1353 record_invariant (data, op, true);
1354 return NULL;
1356 iv->have_use_for = true;
1358 civ = XNEW (struct iv);
1359 *civ = *iv;
1361 stmt = SSA_NAME_DEF_STMT (op);
1362 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1363 || is_gimple_assign (stmt));
1365 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1366 iv->use_id = use->id;
1368 return use;
1371 /* Given a condition in statement STMT, checks whether it is a compare
1372 of an induction variable and an invariant. If this is the case,
1373 CONTROL_VAR is set to location of the iv, BOUND to the location of
1374 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1375 induction variable descriptions, and true is returned. If this is not
1376 the case, CONTROL_VAR and BOUND are set to the arguments of the
1377 condition and false is returned. */
1379 static bool
1380 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1381 tree **control_var, tree **bound,
1382 struct iv **iv_var, struct iv **iv_bound)
1384 /* The objects returned when COND has constant operands. */
1385 static struct iv const_iv;
1386 static tree zero;
1387 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1388 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1389 bool ret = false;
1391 if (gimple_code (stmt) == GIMPLE_COND)
1393 op0 = gimple_cond_lhs_ptr (stmt);
1394 op1 = gimple_cond_rhs_ptr (stmt);
1396 else
1398 op0 = gimple_assign_rhs1_ptr (stmt);
1399 op1 = gimple_assign_rhs2_ptr (stmt);
1402 zero = integer_zero_node;
1403 const_iv.step = integer_zero_node;
1405 if (TREE_CODE (*op0) == SSA_NAME)
1406 iv0 = get_iv (data, *op0);
1407 if (TREE_CODE (*op1) == SSA_NAME)
1408 iv1 = get_iv (data, *op1);
1410 /* Exactly one of the compared values must be an iv, and the other one must
1411 be an invariant. */
1412 if (!iv0 || !iv1)
1413 goto end;
1415 if (integer_zerop (iv0->step))
1417 /* Control variable may be on the other side. */
1418 tmp_op = op0; op0 = op1; op1 = tmp_op;
1419 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1421 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1423 end:
1424 if (control_var)
1425 *control_var = op0;;
1426 if (iv_var)
1427 *iv_var = iv0;;
1428 if (bound)
1429 *bound = op1;
1430 if (iv_bound)
1431 *iv_bound = iv1;
1433 return ret;
1436 /* Checks whether the condition in STMT is interesting and if so,
1437 records it. */
1439 static void
1440 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1442 tree *var_p, *bound_p;
1443 struct iv *var_iv, *civ;
1445 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1447 find_interesting_uses_op (data, *var_p);
1448 find_interesting_uses_op (data, *bound_p);
1449 return;
1452 civ = XNEW (struct iv);
1453 *civ = *var_iv;
1454 record_use (data, NULL, civ, stmt, USE_COMPARE);
1457 /* Returns the outermost loop EXPR is obviously invariant in
1458 relative to the loop LOOP, i.e. if all its operands are defined
1459 outside of the returned loop. Returns NULL if EXPR is not
1460 even obviously invariant in LOOP. */
1462 struct loop *
1463 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1465 basic_block def_bb;
1466 unsigned i, len;
1468 if (is_gimple_min_invariant (expr))
1469 return current_loops->tree_root;
1471 if (TREE_CODE (expr) == SSA_NAME)
1473 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1474 if (def_bb)
1476 if (flow_bb_inside_loop_p (loop, def_bb))
1477 return NULL;
1478 return superloop_at_depth (loop,
1479 loop_depth (def_bb->loop_father) + 1);
1482 return current_loops->tree_root;
1485 if (!EXPR_P (expr))
1486 return NULL;
1488 unsigned maxdepth = 0;
1489 len = TREE_OPERAND_LENGTH (expr);
1490 for (i = 0; i < len; i++)
1492 struct loop *ivloop;
1493 if (!TREE_OPERAND (expr, i))
1494 continue;
1496 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1497 if (!ivloop)
1498 return NULL;
1499 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1502 return superloop_at_depth (loop, maxdepth);
1505 /* Returns true if expression EXPR is obviously invariant in LOOP,
1506 i.e. if all its operands are defined outside of the LOOP. LOOP
1507 should not be the function body. */
1509 bool
1510 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1512 basic_block def_bb;
1513 unsigned i, len;
1515 gcc_assert (loop_depth (loop) > 0);
1517 if (is_gimple_min_invariant (expr))
1518 return true;
1520 if (TREE_CODE (expr) == SSA_NAME)
1522 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1523 if (def_bb
1524 && flow_bb_inside_loop_p (loop, def_bb))
1525 return false;
1527 return true;
1530 if (!EXPR_P (expr))
1531 return false;
1533 len = TREE_OPERAND_LENGTH (expr);
1534 for (i = 0; i < len; i++)
1535 if (TREE_OPERAND (expr, i)
1536 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1537 return false;
1539 return true;
1542 /* Cumulates the steps of indices into DATA and replaces their values with the
1543 initial ones. Returns false when the value of the index cannot be determined.
1544 Callback for for_each_index. */
1546 struct ifs_ivopts_data
1548 struct ivopts_data *ivopts_data;
1549 gimple stmt;
1550 tree step;
1553 static bool
1554 idx_find_step (tree base, tree *idx, void *data)
1556 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1557 struct iv *iv;
1558 tree step, iv_base, iv_step, lbound, off;
1559 struct loop *loop = dta->ivopts_data->current_loop;
1561 /* If base is a component ref, require that the offset of the reference
1562 be invariant. */
1563 if (TREE_CODE (base) == COMPONENT_REF)
1565 off = component_ref_field_offset (base);
1566 return expr_invariant_in_loop_p (loop, off);
1569 /* If base is array, first check whether we will be able to move the
1570 reference out of the loop (in order to take its address in strength
1571 reduction). In order for this to work we need both lower bound
1572 and step to be loop invariants. */
1573 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1575 /* Moreover, for a range, the size needs to be invariant as well. */
1576 if (TREE_CODE (base) == ARRAY_RANGE_REF
1577 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1578 return false;
1580 step = array_ref_element_size (base);
1581 lbound = array_ref_low_bound (base);
1583 if (!expr_invariant_in_loop_p (loop, step)
1584 || !expr_invariant_in_loop_p (loop, lbound))
1585 return false;
1588 if (TREE_CODE (*idx) != SSA_NAME)
1589 return true;
1591 iv = get_iv (dta->ivopts_data, *idx);
1592 if (!iv)
1593 return false;
1595 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1596 *&x[0], which is not folded and does not trigger the
1597 ARRAY_REF path below. */
1598 *idx = iv->base;
1600 if (integer_zerop (iv->step))
1601 return true;
1603 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1605 step = array_ref_element_size (base);
1607 /* We only handle addresses whose step is an integer constant. */
1608 if (TREE_CODE (step) != INTEGER_CST)
1609 return false;
1611 else
1612 /* The step for pointer arithmetics already is 1 byte. */
1613 step = size_one_node;
1615 iv_base = iv->base;
1616 iv_step = iv->step;
1617 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1618 sizetype, &iv_base, &iv_step, dta->stmt,
1619 false))
1621 /* The index might wrap. */
1622 return false;
1625 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1626 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1628 return true;
1631 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1632 object is passed to it in DATA. */
1634 static bool
1635 idx_record_use (tree base, tree *idx,
1636 void *vdata)
1638 struct ivopts_data *data = (struct ivopts_data *) vdata;
1639 find_interesting_uses_op (data, *idx);
1640 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1642 find_interesting_uses_op (data, array_ref_element_size (base));
1643 find_interesting_uses_op (data, array_ref_low_bound (base));
1645 return true;
1648 /* If we can prove that TOP = cst * BOT for some constant cst,
1649 store cst to MUL and return true. Otherwise return false.
1650 The returned value is always sign-extended, regardless of the
1651 signedness of TOP and BOT. */
1653 static bool
1654 constant_multiple_of (tree top, tree bot, widest_int *mul)
1656 tree mby;
1657 enum tree_code code;
1658 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1659 widest_int res, p0, p1;
1661 STRIP_NOPS (top);
1662 STRIP_NOPS (bot);
1664 if (operand_equal_p (top, bot, 0))
1666 *mul = 1;
1667 return true;
1670 code = TREE_CODE (top);
1671 switch (code)
1673 case MULT_EXPR:
1674 mby = TREE_OPERAND (top, 1);
1675 if (TREE_CODE (mby) != INTEGER_CST)
1676 return false;
1678 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1679 return false;
1681 *mul = wi::sext (res * wi::to_widest (mby), precision);
1682 return true;
1684 case PLUS_EXPR:
1685 case MINUS_EXPR:
1686 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1687 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1688 return false;
1690 if (code == MINUS_EXPR)
1691 p1 = -p1;
1692 *mul = wi::sext (p0 + p1, precision);
1693 return true;
1695 case INTEGER_CST:
1696 if (TREE_CODE (bot) != INTEGER_CST)
1697 return false;
1699 p0 = widest_int::from (top, SIGNED);
1700 p1 = widest_int::from (bot, SIGNED);
1701 if (p1 == 0)
1702 return false;
1703 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1704 return res == 0;
1706 default:
1707 return false;
1711 /* Return true if memory reference REF with step STEP may be unaligned. */
1713 static bool
1714 may_be_unaligned_p (tree ref, tree step)
1716 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1717 thus they are not misaligned. */
1718 if (TREE_CODE (ref) == TARGET_MEM_REF)
1719 return false;
1721 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1722 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1723 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1725 unsigned HOST_WIDE_INT bitpos;
1726 unsigned int ref_align;
1727 get_object_alignment_1 (ref, &ref_align, &bitpos);
1728 if (ref_align < align
1729 || (bitpos % align) != 0
1730 || (bitpos % BITS_PER_UNIT) != 0)
1731 return true;
1733 unsigned int trailing_zeros = tree_ctz (step);
1734 if (trailing_zeros < HOST_BITS_PER_INT
1735 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1736 return true;
1738 return false;
1741 /* Return true if EXPR may be non-addressable. */
1743 bool
1744 may_be_nonaddressable_p (tree expr)
1746 switch (TREE_CODE (expr))
1748 case TARGET_MEM_REF:
1749 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1750 target, thus they are always addressable. */
1751 return false;
1753 case COMPONENT_REF:
1754 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1755 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1757 case VIEW_CONVERT_EXPR:
1758 /* This kind of view-conversions may wrap non-addressable objects
1759 and make them look addressable. After some processing the
1760 non-addressability may be uncovered again, causing ADDR_EXPRs
1761 of inappropriate objects to be built. */
1762 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1763 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1764 return true;
1766 /* ... fall through ... */
1768 case ARRAY_REF:
1769 case ARRAY_RANGE_REF:
1770 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1772 CASE_CONVERT:
1773 return true;
1775 default:
1776 break;
1779 return false;
1782 /* Finds addresses in *OP_P inside STMT. */
1784 static void
1785 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1787 tree base = *op_p, step = size_zero_node;
1788 struct iv *civ;
1789 struct ifs_ivopts_data ifs_ivopts_data;
1791 /* Do not play with volatile memory references. A bit too conservative,
1792 perhaps, but safe. */
1793 if (gimple_has_volatile_ops (stmt))
1794 goto fail;
1796 /* Ignore bitfields for now. Not really something terribly complicated
1797 to handle. TODO. */
1798 if (TREE_CODE (base) == BIT_FIELD_REF)
1799 goto fail;
1801 base = unshare_expr (base);
1803 if (TREE_CODE (base) == TARGET_MEM_REF)
1805 tree type = build_pointer_type (TREE_TYPE (base));
1806 tree astep;
1808 if (TMR_BASE (base)
1809 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1811 civ = get_iv (data, TMR_BASE (base));
1812 if (!civ)
1813 goto fail;
1815 TMR_BASE (base) = civ->base;
1816 step = civ->step;
1818 if (TMR_INDEX2 (base)
1819 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1821 civ = get_iv (data, TMR_INDEX2 (base));
1822 if (!civ)
1823 goto fail;
1825 TMR_INDEX2 (base) = civ->base;
1826 step = civ->step;
1828 if (TMR_INDEX (base)
1829 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1831 civ = get_iv (data, TMR_INDEX (base));
1832 if (!civ)
1833 goto fail;
1835 TMR_INDEX (base) = civ->base;
1836 astep = civ->step;
1838 if (astep)
1840 if (TMR_STEP (base))
1841 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1843 step = fold_build2 (PLUS_EXPR, type, step, astep);
1847 if (integer_zerop (step))
1848 goto fail;
1849 base = tree_mem_ref_addr (type, base);
1851 else
1853 ifs_ivopts_data.ivopts_data = data;
1854 ifs_ivopts_data.stmt = stmt;
1855 ifs_ivopts_data.step = size_zero_node;
1856 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1857 || integer_zerop (ifs_ivopts_data.step))
1858 goto fail;
1859 step = ifs_ivopts_data.step;
1861 /* Check that the base expression is addressable. This needs
1862 to be done after substituting bases of IVs into it. */
1863 if (may_be_nonaddressable_p (base))
1864 goto fail;
1866 /* Moreover, on strict alignment platforms, check that it is
1867 sufficiently aligned. */
1868 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1869 goto fail;
1871 base = build_fold_addr_expr (base);
1873 /* Substituting bases of IVs into the base expression might
1874 have caused folding opportunities. */
1875 if (TREE_CODE (base) == ADDR_EXPR)
1877 tree *ref = &TREE_OPERAND (base, 0);
1878 while (handled_component_p (*ref))
1879 ref = &TREE_OPERAND (*ref, 0);
1880 if (TREE_CODE (*ref) == MEM_REF)
1882 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1883 TREE_OPERAND (*ref, 0),
1884 TREE_OPERAND (*ref, 1));
1885 if (tem)
1886 *ref = tem;
1891 civ = alloc_iv (base, step);
1892 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1893 return;
1895 fail:
1896 for_each_index (op_p, idx_record_use, data);
1899 /* Finds and records invariants used in STMT. */
1901 static void
1902 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1904 ssa_op_iter iter;
1905 use_operand_p use_p;
1906 tree op;
1908 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1910 op = USE_FROM_PTR (use_p);
1911 record_invariant (data, op, false);
1915 /* Finds interesting uses of induction variables in the statement STMT. */
1917 static void
1918 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1920 struct iv *iv;
1921 tree op, *lhs, *rhs;
1922 ssa_op_iter iter;
1923 use_operand_p use_p;
1924 enum tree_code code;
1926 find_invariants_stmt (data, stmt);
1928 if (gimple_code (stmt) == GIMPLE_COND)
1930 find_interesting_uses_cond (data, stmt);
1931 return;
1934 if (is_gimple_assign (stmt))
1936 lhs = gimple_assign_lhs_ptr (stmt);
1937 rhs = gimple_assign_rhs1_ptr (stmt);
1939 if (TREE_CODE (*lhs) == SSA_NAME)
1941 /* If the statement defines an induction variable, the uses are not
1942 interesting by themselves. */
1944 iv = get_iv (data, *lhs);
1946 if (iv && !integer_zerop (iv->step))
1947 return;
1950 code = gimple_assign_rhs_code (stmt);
1951 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1952 && (REFERENCE_CLASS_P (*rhs)
1953 || is_gimple_val (*rhs)))
1955 if (REFERENCE_CLASS_P (*rhs))
1956 find_interesting_uses_address (data, stmt, rhs);
1957 else
1958 find_interesting_uses_op (data, *rhs);
1960 if (REFERENCE_CLASS_P (*lhs))
1961 find_interesting_uses_address (data, stmt, lhs);
1962 return;
1964 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1966 find_interesting_uses_cond (data, stmt);
1967 return;
1970 /* TODO -- we should also handle address uses of type
1972 memory = call (whatever);
1976 call (memory). */
1979 if (gimple_code (stmt) == GIMPLE_PHI
1980 && gimple_bb (stmt) == data->current_loop->header)
1982 iv = get_iv (data, PHI_RESULT (stmt));
1984 if (iv && !integer_zerop (iv->step))
1985 return;
1988 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1990 op = USE_FROM_PTR (use_p);
1992 if (TREE_CODE (op) != SSA_NAME)
1993 continue;
1995 iv = get_iv (data, op);
1996 if (!iv)
1997 continue;
1999 find_interesting_uses_op (data, op);
2003 /* Finds interesting uses of induction variables outside of loops
2004 on loop exit edge EXIT. */
2006 static void
2007 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2009 gimple phi;
2010 gimple_stmt_iterator psi;
2011 tree def;
2013 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2015 phi = gsi_stmt (psi);
2016 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2017 if (!virtual_operand_p (def))
2018 find_interesting_uses_op (data, def);
2022 /* Finds uses of the induction variables that are interesting. */
2024 static void
2025 find_interesting_uses (struct ivopts_data *data)
2027 basic_block bb;
2028 gimple_stmt_iterator bsi;
2029 basic_block *body = get_loop_body (data->current_loop);
2030 unsigned i;
2031 struct version_info *info;
2032 edge e;
2034 if (dump_file && (dump_flags & TDF_DETAILS))
2035 fprintf (dump_file, "Uses:\n\n");
2037 for (i = 0; i < data->current_loop->num_nodes; i++)
2039 edge_iterator ei;
2040 bb = body[i];
2042 FOR_EACH_EDGE (e, ei, bb->succs)
2043 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2044 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2045 find_interesting_uses_outside (data, e);
2047 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2048 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2049 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2050 if (!is_gimple_debug (gsi_stmt (bsi)))
2051 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2054 if (dump_file && (dump_flags & TDF_DETAILS))
2056 bitmap_iterator bi;
2058 fprintf (dump_file, "\n");
2060 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2062 info = ver_info (data, i);
2063 if (info->inv_id)
2065 fprintf (dump_file, " ");
2066 print_generic_expr (dump_file, info->name, TDF_SLIM);
2067 fprintf (dump_file, " is invariant (%d)%s\n",
2068 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2072 fprintf (dump_file, "\n");
2075 free (body);
2078 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2079 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2080 we are at the top-level of the processed address. */
2082 static tree
2083 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2084 HOST_WIDE_INT *offset)
2086 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2087 enum tree_code code;
2088 tree type, orig_type = TREE_TYPE (expr);
2089 HOST_WIDE_INT off0, off1, st;
2090 tree orig_expr = expr;
2092 STRIP_NOPS (expr);
2094 type = TREE_TYPE (expr);
2095 code = TREE_CODE (expr);
2096 *offset = 0;
2098 switch (code)
2100 case INTEGER_CST:
2101 if (!cst_and_fits_in_hwi (expr)
2102 || integer_zerop (expr))
2103 return orig_expr;
2105 *offset = int_cst_value (expr);
2106 return build_int_cst (orig_type, 0);
2108 case POINTER_PLUS_EXPR:
2109 case PLUS_EXPR:
2110 case MINUS_EXPR:
2111 op0 = TREE_OPERAND (expr, 0);
2112 op1 = TREE_OPERAND (expr, 1);
2114 op0 = strip_offset_1 (op0, false, false, &off0);
2115 op1 = strip_offset_1 (op1, false, false, &off1);
2117 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2118 if (op0 == TREE_OPERAND (expr, 0)
2119 && op1 == TREE_OPERAND (expr, 1))
2120 return orig_expr;
2122 if (integer_zerop (op1))
2123 expr = op0;
2124 else if (integer_zerop (op0))
2126 if (code == MINUS_EXPR)
2127 expr = fold_build1 (NEGATE_EXPR, type, op1);
2128 else
2129 expr = op1;
2131 else
2132 expr = fold_build2 (code, type, op0, op1);
2134 return fold_convert (orig_type, expr);
2136 case MULT_EXPR:
2137 op1 = TREE_OPERAND (expr, 1);
2138 if (!cst_and_fits_in_hwi (op1))
2139 return orig_expr;
2141 op0 = TREE_OPERAND (expr, 0);
2142 op0 = strip_offset_1 (op0, false, false, &off0);
2143 if (op0 == TREE_OPERAND (expr, 0))
2144 return orig_expr;
2146 *offset = off0 * int_cst_value (op1);
2147 if (integer_zerop (op0))
2148 expr = op0;
2149 else
2150 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2152 return fold_convert (orig_type, expr);
2154 case ARRAY_REF:
2155 case ARRAY_RANGE_REF:
2156 if (!inside_addr)
2157 return orig_expr;
2159 step = array_ref_element_size (expr);
2160 if (!cst_and_fits_in_hwi (step))
2161 break;
2163 st = int_cst_value (step);
2164 op1 = TREE_OPERAND (expr, 1);
2165 op1 = strip_offset_1 (op1, false, false, &off1);
2166 *offset = off1 * st;
2168 if (top_compref
2169 && integer_zerop (op1))
2171 /* Strip the component reference completely. */
2172 op0 = TREE_OPERAND (expr, 0);
2173 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2174 *offset += off0;
2175 return op0;
2177 break;
2179 case COMPONENT_REF:
2181 tree field;
2183 if (!inside_addr)
2184 return orig_expr;
2186 tmp = component_ref_field_offset (expr);
2187 field = TREE_OPERAND (expr, 1);
2188 if (top_compref
2189 && cst_and_fits_in_hwi (tmp)
2190 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2192 HOST_WIDE_INT boffset, abs_off;
2194 /* Strip the component reference completely. */
2195 op0 = TREE_OPERAND (expr, 0);
2196 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2197 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2198 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2199 if (boffset < 0)
2200 abs_off = -abs_off;
2202 *offset = off0 + int_cst_value (tmp) + abs_off;
2203 return op0;
2206 break;
2208 case ADDR_EXPR:
2209 op0 = TREE_OPERAND (expr, 0);
2210 op0 = strip_offset_1 (op0, true, true, &off0);
2211 *offset += off0;
2213 if (op0 == TREE_OPERAND (expr, 0))
2214 return orig_expr;
2216 expr = build_fold_addr_expr (op0);
2217 return fold_convert (orig_type, expr);
2219 case MEM_REF:
2220 /* ??? Offset operand? */
2221 inside_addr = false;
2222 break;
2224 default:
2225 return orig_expr;
2228 /* Default handling of expressions for that we want to recurse into
2229 the first operand. */
2230 op0 = TREE_OPERAND (expr, 0);
2231 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2232 *offset += off0;
2234 if (op0 == TREE_OPERAND (expr, 0)
2235 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2236 return orig_expr;
2238 expr = copy_node (expr);
2239 TREE_OPERAND (expr, 0) = op0;
2240 if (op1)
2241 TREE_OPERAND (expr, 1) = op1;
2243 /* Inside address, we might strip the top level component references,
2244 thus changing type of the expression. Handling of ADDR_EXPR
2245 will fix that. */
2246 expr = fold_convert (orig_type, expr);
2248 return expr;
2251 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2253 static tree
2254 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2256 HOST_WIDE_INT off;
2257 tree core = strip_offset_1 (expr, false, false, &off);
2258 *offset = off;
2259 return core;
2262 /* Returns variant of TYPE that can be used as base for different uses.
2263 We return unsigned type with the same precision, which avoids problems
2264 with overflows. */
2266 static tree
2267 generic_type_for (tree type)
2269 if (POINTER_TYPE_P (type))
2270 return unsigned_type_for (type);
2272 if (TYPE_UNSIGNED (type))
2273 return type;
2275 return unsigned_type_for (type);
2278 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2279 the bitmap to that we should store it. */
2281 static struct ivopts_data *fd_ivopts_data;
2282 static tree
2283 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2285 bitmap *depends_on = (bitmap *) data;
2286 struct version_info *info;
2288 if (TREE_CODE (*expr_p) != SSA_NAME)
2289 return NULL_TREE;
2290 info = name_info (fd_ivopts_data, *expr_p);
2292 if (!info->inv_id || info->has_nonlin_use)
2293 return NULL_TREE;
2295 if (!*depends_on)
2296 *depends_on = BITMAP_ALLOC (NULL);
2297 bitmap_set_bit (*depends_on, info->inv_id);
2299 return NULL_TREE;
2302 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2303 position to POS. If USE is not NULL, the candidate is set as related to
2304 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2305 replacement of the final value of the iv by a direct computation. */
2307 static struct iv_cand *
2308 add_candidate_1 (struct ivopts_data *data,
2309 tree base, tree step, bool important, enum iv_position pos,
2310 struct iv_use *use, gimple incremented_at)
2312 unsigned i;
2313 struct iv_cand *cand = NULL;
2314 tree type, orig_type;
2316 /* For non-original variables, make sure their values are computed in a type
2317 that does not invoke undefined behavior on overflows (since in general,
2318 we cannot prove that these induction variables are non-wrapping). */
2319 if (pos != IP_ORIGINAL)
2321 orig_type = TREE_TYPE (base);
2322 type = generic_type_for (orig_type);
2323 if (type != orig_type)
2325 base = fold_convert (type, base);
2326 step = fold_convert (type, step);
2330 for (i = 0; i < n_iv_cands (data); i++)
2332 cand = iv_cand (data, i);
2334 if (cand->pos != pos)
2335 continue;
2337 if (cand->incremented_at != incremented_at
2338 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2339 && cand->ainc_use != use))
2340 continue;
2342 if (!cand->iv)
2344 if (!base && !step)
2345 break;
2347 continue;
2350 if (!base && !step)
2351 continue;
2353 if (operand_equal_p (base, cand->iv->base, 0)
2354 && operand_equal_p (step, cand->iv->step, 0)
2355 && (TYPE_PRECISION (TREE_TYPE (base))
2356 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2357 break;
2360 if (i == n_iv_cands (data))
2362 cand = XCNEW (struct iv_cand);
2363 cand->id = i;
2365 if (!base && !step)
2366 cand->iv = NULL;
2367 else
2368 cand->iv = alloc_iv (base, step);
2370 cand->pos = pos;
2371 if (pos != IP_ORIGINAL && cand->iv)
2373 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2374 cand->var_after = cand->var_before;
2376 cand->important = important;
2377 cand->incremented_at = incremented_at;
2378 data->iv_candidates.safe_push (cand);
2380 if (step
2381 && TREE_CODE (step) != INTEGER_CST)
2383 fd_ivopts_data = data;
2384 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2387 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2388 cand->ainc_use = use;
2389 else
2390 cand->ainc_use = NULL;
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 dump_cand (dump_file, cand);
2396 if (important && !cand->important)
2398 cand->important = true;
2399 if (dump_file && (dump_flags & TDF_DETAILS))
2400 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2403 if (use)
2405 bitmap_set_bit (use->related_cands, i);
2406 if (dump_file && (dump_flags & TDF_DETAILS))
2407 fprintf (dump_file, "Candidate %d is related to use %d\n",
2408 cand->id, use->id);
2411 return cand;
2414 /* Returns true if incrementing the induction variable at the end of the LOOP
2415 is allowed.
2417 The purpose is to avoid splitting latch edge with a biv increment, thus
2418 creating a jump, possibly confusing other optimization passes and leaving
2419 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2420 is not available (so we do not have a better alternative), or if the latch
2421 edge is already nonempty. */
2423 static bool
2424 allow_ip_end_pos_p (struct loop *loop)
2426 if (!ip_normal_pos (loop))
2427 return true;
2429 if (!empty_block_p (ip_end_pos (loop)))
2430 return true;
2432 return false;
2435 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2436 Important field is set to IMPORTANT. */
2438 static void
2439 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2440 bool important, struct iv_use *use)
2442 basic_block use_bb = gimple_bb (use->stmt);
2443 machine_mode mem_mode;
2444 unsigned HOST_WIDE_INT cstepi;
2446 /* If we insert the increment in any position other than the standard
2447 ones, we must ensure that it is incremented once per iteration.
2448 It must not be in an inner nested loop, or one side of an if
2449 statement. */
2450 if (use_bb->loop_father != data->current_loop
2451 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2452 || stmt_could_throw_p (use->stmt)
2453 || !cst_and_fits_in_hwi (step))
2454 return;
2456 cstepi = int_cst_value (step);
2458 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2459 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2460 || USE_STORE_PRE_INCREMENT (mem_mode))
2461 && GET_MODE_SIZE (mem_mode) == cstepi)
2462 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2463 || USE_STORE_PRE_DECREMENT (mem_mode))
2464 && GET_MODE_SIZE (mem_mode) == -cstepi))
2466 enum tree_code code = MINUS_EXPR;
2467 tree new_base;
2468 tree new_step = step;
2470 if (POINTER_TYPE_P (TREE_TYPE (base)))
2472 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2473 code = POINTER_PLUS_EXPR;
2475 else
2476 new_step = fold_convert (TREE_TYPE (base), new_step);
2477 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2478 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2479 use->stmt);
2481 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2482 || USE_STORE_POST_INCREMENT (mem_mode))
2483 && GET_MODE_SIZE (mem_mode) == cstepi)
2484 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2485 || USE_STORE_POST_DECREMENT (mem_mode))
2486 && GET_MODE_SIZE (mem_mode) == -cstepi))
2488 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2489 use->stmt);
2493 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2494 position to POS. If USE is not NULL, the candidate is set as related to
2495 it. The candidate computation is scheduled on all available positions. */
2497 static void
2498 add_candidate (struct ivopts_data *data,
2499 tree base, tree step, bool important, struct iv_use *use)
2501 if (ip_normal_pos (data->current_loop))
2502 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2503 if (ip_end_pos (data->current_loop)
2504 && allow_ip_end_pos_p (data->current_loop))
2505 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2507 if (use != NULL && use->type == USE_ADDRESS)
2508 add_autoinc_candidates (data, base, step, important, use);
2511 /* Adds standard iv candidates. */
2513 static void
2514 add_standard_iv_candidates (struct ivopts_data *data)
2516 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2518 /* The same for a double-integer type if it is still fast enough. */
2519 if (TYPE_PRECISION
2520 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2521 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2522 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2523 build_int_cst (long_integer_type_node, 1), true, NULL);
2525 /* The same for a double-integer type if it is still fast enough. */
2526 if (TYPE_PRECISION
2527 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2528 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2529 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2530 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2534 /* Adds candidates bases on the old induction variable IV. */
2536 static void
2537 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2539 gimple phi;
2540 tree def;
2541 struct iv_cand *cand;
2543 add_candidate (data, iv->base, iv->step, true, NULL);
2545 /* The same, but with initial value zero. */
2546 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2547 add_candidate (data, size_int (0), iv->step, true, NULL);
2548 else
2549 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2550 iv->step, true, NULL);
2552 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2553 if (gimple_code (phi) == GIMPLE_PHI)
2555 /* Additionally record the possibility of leaving the original iv
2556 untouched. */
2557 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2558 /* Don't add candidate if it's from another PHI node because
2559 it's an affine iv appearing in the form of PEELED_CHREC. */
2560 phi = SSA_NAME_DEF_STMT (def);
2561 if (gimple_code (phi) != GIMPLE_PHI)
2563 cand = add_candidate_1 (data,
2564 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2565 SSA_NAME_DEF_STMT (def));
2566 cand->var_before = iv->ssa_name;
2567 cand->var_after = def;
2569 else
2570 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2574 /* Adds candidates based on the old induction variables. */
2576 static void
2577 add_old_ivs_candidates (struct ivopts_data *data)
2579 unsigned i;
2580 struct iv *iv;
2581 bitmap_iterator bi;
2583 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2585 iv = ver_info (data, i)->iv;
2586 if (iv && iv->biv_p && !integer_zerop (iv->step))
2587 add_old_iv_candidates (data, iv);
2591 /* Adds candidates based on the value of the induction variable IV and USE. */
2593 static void
2594 add_iv_value_candidates (struct ivopts_data *data,
2595 struct iv *iv, struct iv_use *use)
2597 unsigned HOST_WIDE_INT offset;
2598 tree base;
2599 tree basetype;
2601 add_candidate (data, iv->base, iv->step, false, use);
2603 /* The same, but with initial value zero. Make such variable important,
2604 since it is generic enough so that possibly many uses may be based
2605 on it. */
2606 basetype = TREE_TYPE (iv->base);
2607 if (POINTER_TYPE_P (basetype))
2608 basetype = sizetype;
2609 add_candidate (data, build_int_cst (basetype, 0),
2610 iv->step, true, use);
2612 /* Third, try removing the constant offset. Make sure to even
2613 add a candidate for &a[0] vs. (T *)&a. */
2614 base = strip_offset (iv->base, &offset);
2615 if (offset
2616 || base != iv->base)
2617 add_candidate (data, base, iv->step, false, use);
2620 /* Adds candidates based on the uses. */
2622 static void
2623 add_derived_ivs_candidates (struct ivopts_data *data)
2625 unsigned i;
2627 for (i = 0; i < n_iv_uses (data); i++)
2629 struct iv_use *use = iv_use (data, i);
2631 if (!use)
2632 continue;
2634 switch (use->type)
2636 case USE_NONLINEAR_EXPR:
2637 case USE_COMPARE:
2638 case USE_ADDRESS:
2639 /* Just add the ivs based on the value of the iv used here. */
2640 add_iv_value_candidates (data, use->iv, use);
2641 break;
2643 default:
2644 gcc_unreachable ();
2649 /* Record important candidates and add them to related_cands bitmaps
2650 if needed. */
2652 static void
2653 record_important_candidates (struct ivopts_data *data)
2655 unsigned i;
2656 struct iv_use *use;
2658 for (i = 0; i < n_iv_cands (data); i++)
2660 struct iv_cand *cand = iv_cand (data, i);
2662 if (cand->important)
2663 bitmap_set_bit (data->important_candidates, i);
2666 data->consider_all_candidates = (n_iv_cands (data)
2667 <= CONSIDER_ALL_CANDIDATES_BOUND);
2669 if (data->consider_all_candidates)
2671 /* We will not need "related_cands" bitmaps in this case,
2672 so release them to decrease peak memory consumption. */
2673 for (i = 0; i < n_iv_uses (data); i++)
2675 use = iv_use (data, i);
2676 BITMAP_FREE (use->related_cands);
2679 else
2681 /* Add important candidates to the related_cands bitmaps. */
2682 for (i = 0; i < n_iv_uses (data); i++)
2683 bitmap_ior_into (iv_use (data, i)->related_cands,
2684 data->important_candidates);
2688 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2689 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2690 we allocate a simple list to every use. */
2692 static void
2693 alloc_use_cost_map (struct ivopts_data *data)
2695 unsigned i, size, s;
2697 for (i = 0; i < n_iv_uses (data); i++)
2699 struct iv_use *use = iv_use (data, i);
2701 if (data->consider_all_candidates)
2702 size = n_iv_cands (data);
2703 else
2705 s = bitmap_count_bits (use->related_cands);
2707 /* Round up to the power of two, so that moduling by it is fast. */
2708 size = s ? (1 << ceil_log2 (s)) : 1;
2711 use->n_map_members = size;
2712 use->cost_map = XCNEWVEC (struct cost_pair, size);
2716 /* Returns description of computation cost of expression whose runtime
2717 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2719 static comp_cost
2720 new_cost (unsigned runtime, unsigned complexity)
2722 comp_cost cost;
2724 cost.cost = runtime;
2725 cost.complexity = complexity;
2727 return cost;
2730 /* Adds costs COST1 and COST2. */
2732 static comp_cost
2733 add_costs (comp_cost cost1, comp_cost cost2)
2735 cost1.cost += cost2.cost;
2736 cost1.complexity += cost2.complexity;
2738 return cost1;
2740 /* Subtracts costs COST1 and COST2. */
2742 static comp_cost
2743 sub_costs (comp_cost cost1, comp_cost cost2)
2745 cost1.cost -= cost2.cost;
2746 cost1.complexity -= cost2.complexity;
2748 return cost1;
2751 /* Returns a negative number if COST1 < COST2, a positive number if
2752 COST1 > COST2, and 0 if COST1 = COST2. */
2754 static int
2755 compare_costs (comp_cost cost1, comp_cost cost2)
2757 if (cost1.cost == cost2.cost)
2758 return cost1.complexity - cost2.complexity;
2760 return cost1.cost - cost2.cost;
2763 /* Returns true if COST is infinite. */
2765 static bool
2766 infinite_cost_p (comp_cost cost)
2768 return cost.cost == INFTY;
2771 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2772 on invariants DEPENDS_ON and that the value used in expressing it
2773 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2775 static void
2776 set_use_iv_cost (struct ivopts_data *data,
2777 struct iv_use *use, struct iv_cand *cand,
2778 comp_cost cost, bitmap depends_on, tree value,
2779 enum tree_code comp, int inv_expr_id)
2781 unsigned i, s;
2783 if (infinite_cost_p (cost))
2785 BITMAP_FREE (depends_on);
2786 return;
2789 if (data->consider_all_candidates)
2791 use->cost_map[cand->id].cand = cand;
2792 use->cost_map[cand->id].cost = cost;
2793 use->cost_map[cand->id].depends_on = depends_on;
2794 use->cost_map[cand->id].value = value;
2795 use->cost_map[cand->id].comp = comp;
2796 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2797 return;
2800 /* n_map_members is a power of two, so this computes modulo. */
2801 s = cand->id & (use->n_map_members - 1);
2802 for (i = s; i < use->n_map_members; i++)
2803 if (!use->cost_map[i].cand)
2804 goto found;
2805 for (i = 0; i < s; i++)
2806 if (!use->cost_map[i].cand)
2807 goto found;
2809 gcc_unreachable ();
2811 found:
2812 use->cost_map[i].cand = cand;
2813 use->cost_map[i].cost = cost;
2814 use->cost_map[i].depends_on = depends_on;
2815 use->cost_map[i].value = value;
2816 use->cost_map[i].comp = comp;
2817 use->cost_map[i].inv_expr_id = inv_expr_id;
2820 /* Gets cost of (USE, CANDIDATE) pair. */
2822 static struct cost_pair *
2823 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2824 struct iv_cand *cand)
2826 unsigned i, s;
2827 struct cost_pair *ret;
2829 if (!cand)
2830 return NULL;
2832 if (data->consider_all_candidates)
2834 ret = use->cost_map + cand->id;
2835 if (!ret->cand)
2836 return NULL;
2838 return ret;
2841 /* n_map_members is a power of two, so this computes modulo. */
2842 s = cand->id & (use->n_map_members - 1);
2843 for (i = s; i < use->n_map_members; i++)
2844 if (use->cost_map[i].cand == cand)
2845 return use->cost_map + i;
2846 else if (use->cost_map[i].cand == NULL)
2847 return NULL;
2848 for (i = 0; i < s; i++)
2849 if (use->cost_map[i].cand == cand)
2850 return use->cost_map + i;
2851 else if (use->cost_map[i].cand == NULL)
2852 return NULL;
2854 return NULL;
2857 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2858 static rtx
2859 produce_memory_decl_rtl (tree obj, int *regno)
2861 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2862 machine_mode address_mode = targetm.addr_space.address_mode (as);
2863 rtx x;
2865 gcc_assert (obj);
2866 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2868 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2869 x = gen_rtx_SYMBOL_REF (address_mode, name);
2870 SET_SYMBOL_REF_DECL (x, obj);
2871 x = gen_rtx_MEM (DECL_MODE (obj), x);
2872 set_mem_addr_space (x, as);
2873 targetm.encode_section_info (obj, x, true);
2875 else
2877 x = gen_raw_REG (address_mode, (*regno)++);
2878 x = gen_rtx_MEM (DECL_MODE (obj), x);
2879 set_mem_addr_space (x, as);
2882 return x;
2885 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2886 walk_tree. DATA contains the actual fake register number. */
2888 static tree
2889 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2891 tree obj = NULL_TREE;
2892 rtx x = NULL_RTX;
2893 int *regno = (int *) data;
2895 switch (TREE_CODE (*expr_p))
2897 case ADDR_EXPR:
2898 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2899 handled_component_p (*expr_p);
2900 expr_p = &TREE_OPERAND (*expr_p, 0))
2901 continue;
2902 obj = *expr_p;
2903 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2904 x = produce_memory_decl_rtl (obj, regno);
2905 break;
2907 case SSA_NAME:
2908 *ws = 0;
2909 obj = SSA_NAME_VAR (*expr_p);
2910 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2911 if (!obj)
2912 return NULL_TREE;
2913 if (!DECL_RTL_SET_P (obj))
2914 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2915 break;
2917 case VAR_DECL:
2918 case PARM_DECL:
2919 case RESULT_DECL:
2920 *ws = 0;
2921 obj = *expr_p;
2923 if (DECL_RTL_SET_P (obj))
2924 break;
2926 if (DECL_MODE (obj) == BLKmode)
2927 x = produce_memory_decl_rtl (obj, regno);
2928 else
2929 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2931 break;
2933 default:
2934 break;
2937 if (x)
2939 decl_rtl_to_reset.safe_push (obj);
2940 SET_DECL_RTL (obj, x);
2943 return NULL_TREE;
2946 /* Determines cost of the computation of EXPR. */
2948 static unsigned
2949 computation_cost (tree expr, bool speed)
2951 rtx_insn *seq;
2952 rtx rslt;
2953 tree type = TREE_TYPE (expr);
2954 unsigned cost;
2955 /* Avoid using hard regs in ways which may be unsupported. */
2956 int regno = LAST_VIRTUAL_REGISTER + 1;
2957 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958 enum node_frequency real_frequency = node->frequency;
2960 node->frequency = NODE_FREQUENCY_NORMAL;
2961 crtl->maybe_hot_insn_p = speed;
2962 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2963 start_sequence ();
2964 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2965 seq = get_insns ();
2966 end_sequence ();
2967 default_rtl_profile ();
2968 node->frequency = real_frequency;
2970 cost = seq_cost (seq, speed);
2971 if (MEM_P (rslt))
2972 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2973 TYPE_ADDR_SPACE (type), speed);
2974 else if (!REG_P (rslt))
2975 cost += set_src_cost (rslt, speed);
2977 return cost;
2980 /* Returns variable containing the value of candidate CAND at statement AT. */
2982 static tree
2983 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2985 if (stmt_after_increment (loop, cand, stmt))
2986 return cand->var_after;
2987 else
2988 return cand->var_before;
2991 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2992 same precision that is at least as wide as the precision of TYPE, stores
2993 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2994 type of A and B. */
2996 static tree
2997 determine_common_wider_type (tree *a, tree *b)
2999 tree wider_type = NULL;
3000 tree suba, subb;
3001 tree atype = TREE_TYPE (*a);
3003 if (CONVERT_EXPR_P (*a))
3005 suba = TREE_OPERAND (*a, 0);
3006 wider_type = TREE_TYPE (suba);
3007 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3008 return atype;
3010 else
3011 return atype;
3013 if (CONVERT_EXPR_P (*b))
3015 subb = TREE_OPERAND (*b, 0);
3016 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3017 return atype;
3019 else
3020 return atype;
3022 *a = suba;
3023 *b = subb;
3024 return wider_type;
3027 /* Determines the expression by that USE is expressed from induction variable
3028 CAND at statement AT in LOOP. The expression is stored in a decomposed
3029 form into AFF. Returns false if USE cannot be expressed using CAND. */
3031 static bool
3032 get_computation_aff (struct loop *loop,
3033 struct iv_use *use, struct iv_cand *cand, gimple at,
3034 struct aff_tree *aff)
3036 tree ubase = use->iv->base;
3037 tree ustep = use->iv->step;
3038 tree cbase = cand->iv->base;
3039 tree cstep = cand->iv->step, cstep_common;
3040 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3041 tree common_type, var;
3042 tree uutype;
3043 aff_tree cbase_aff, var_aff;
3044 widest_int rat;
3046 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3048 /* We do not have a precision to express the values of use. */
3049 return false;
3052 var = var_at_stmt (loop, cand, at);
3053 uutype = unsigned_type_for (utype);
3055 /* If the conversion is not noop, perform it. */
3056 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3058 cstep = fold_convert (uutype, cstep);
3059 cbase = fold_convert (uutype, cbase);
3060 var = fold_convert (uutype, var);
3063 if (!constant_multiple_of (ustep, cstep, &rat))
3064 return false;
3066 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3067 type, we achieve better folding by computing their difference in this
3068 wider type, and cast the result to UUTYPE. We do not need to worry about
3069 overflows, as all the arithmetics will in the end be performed in UUTYPE
3070 anyway. */
3071 common_type = determine_common_wider_type (&ubase, &cbase);
3073 /* use = ubase - ratio * cbase + ratio * var. */
3074 tree_to_aff_combination (ubase, common_type, aff);
3075 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3076 tree_to_aff_combination (var, uutype, &var_aff);
3078 /* We need to shift the value if we are after the increment. */
3079 if (stmt_after_increment (loop, cand, at))
3081 aff_tree cstep_aff;
3083 if (common_type != uutype)
3084 cstep_common = fold_convert (common_type, cstep);
3085 else
3086 cstep_common = cstep;
3088 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3089 aff_combination_add (&cbase_aff, &cstep_aff);
3092 aff_combination_scale (&cbase_aff, -rat);
3093 aff_combination_add (aff, &cbase_aff);
3094 if (common_type != uutype)
3095 aff_combination_convert (aff, uutype);
3097 aff_combination_scale (&var_aff, rat);
3098 aff_combination_add (aff, &var_aff);
3100 return true;
3103 /* Return the type of USE. */
3105 static tree
3106 get_use_type (struct iv_use *use)
3108 tree base_type = TREE_TYPE (use->iv->base);
3109 tree type;
3111 if (use->type == USE_ADDRESS)
3113 /* The base_type may be a void pointer. Create a pointer type based on
3114 the mem_ref instead. */
3115 type = build_pointer_type (TREE_TYPE (*use->op_p));
3116 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3117 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3119 else
3120 type = base_type;
3122 return type;
3125 /* Determines the expression by that USE is expressed from induction variable
3126 CAND at statement AT in LOOP. The computation is unshared. */
3128 static tree
3129 get_computation_at (struct loop *loop,
3130 struct iv_use *use, struct iv_cand *cand, gimple at)
3132 aff_tree aff;
3133 tree type = get_use_type (use);
3135 if (!get_computation_aff (loop, use, cand, at, &aff))
3136 return NULL_TREE;
3137 unshare_aff_combination (&aff);
3138 return fold_convert (type, aff_combination_to_tree (&aff));
3141 /* Determines the expression by that USE is expressed from induction variable
3142 CAND in LOOP. The computation is unshared. */
3144 static tree
3145 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3147 return get_computation_at (loop, use, cand, use->stmt);
3150 /* Adjust the cost COST for being in loop setup rather than loop body.
3151 If we're optimizing for space, the loop setup overhead is constant;
3152 if we're optimizing for speed, amortize it over the per-iteration cost. */
3153 static unsigned
3154 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3156 if (cost == INFTY)
3157 return cost;
3158 else if (optimize_loop_for_speed_p (data->current_loop))
3159 return cost / avg_loop_niter (data->current_loop);
3160 else
3161 return cost;
3164 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3165 validity for a memory reference accessing memory of mode MODE in
3166 address space AS. */
3169 bool
3170 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3171 addr_space_t as)
3173 #define MAX_RATIO 128
3174 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3175 static vec<sbitmap> valid_mult_list;
3176 sbitmap valid_mult;
3178 if (data_index >= valid_mult_list.length ())
3179 valid_mult_list.safe_grow_cleared (data_index + 1);
3181 valid_mult = valid_mult_list[data_index];
3182 if (!valid_mult)
3184 machine_mode address_mode = targetm.addr_space.address_mode (as);
3185 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3186 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3187 rtx addr, scaled;
3188 HOST_WIDE_INT i;
3190 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3191 bitmap_clear (valid_mult);
3192 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3193 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3194 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3196 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3197 if (memory_address_addr_space_p (mode, addr, as)
3198 || memory_address_addr_space_p (mode, scaled, as))
3199 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3204 fprintf (dump_file, " allowed multipliers:");
3205 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3206 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3207 fprintf (dump_file, " %d", (int) i);
3208 fprintf (dump_file, "\n");
3209 fprintf (dump_file, "\n");
3212 valid_mult_list[data_index] = valid_mult;
3215 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3216 return false;
3218 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3221 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3222 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3223 variable is omitted. Compute the cost for a memory reference that accesses
3224 a memory location of mode MEM_MODE in address space AS.
3226 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3227 size of MEM_MODE / RATIO) is available. To make this determination, we
3228 look at the size of the increment to be made, which is given in CSTEP.
3229 CSTEP may be zero if the step is unknown.
3230 STMT_AFTER_INC is true iff the statement we're looking at is after the
3231 increment of the original biv.
3233 TODO -- there must be some better way. This all is quite crude. */
3235 enum ainc_type
3237 AINC_PRE_INC, /* Pre increment. */
3238 AINC_PRE_DEC, /* Pre decrement. */
3239 AINC_POST_INC, /* Post increment. */
3240 AINC_POST_DEC, /* Post decrement. */
3241 AINC_NONE /* Also the number of auto increment types. */
3244 typedef struct address_cost_data_s
3246 HOST_WIDE_INT min_offset, max_offset;
3247 unsigned costs[2][2][2][2];
3248 unsigned ainc_costs[AINC_NONE];
3249 } *address_cost_data;
3252 static comp_cost
3253 get_address_cost (bool symbol_present, bool var_present,
3254 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3255 HOST_WIDE_INT cstep, machine_mode mem_mode,
3256 addr_space_t as, bool speed,
3257 bool stmt_after_inc, bool *may_autoinc)
3259 machine_mode address_mode = targetm.addr_space.address_mode (as);
3260 static vec<address_cost_data> address_cost_data_list;
3261 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3262 address_cost_data data;
3263 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3264 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3265 unsigned cost, acost, complexity;
3266 enum ainc_type autoinc_type;
3267 bool offset_p, ratio_p, autoinc;
3268 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3269 unsigned HOST_WIDE_INT mask;
3270 unsigned bits;
3272 if (data_index >= address_cost_data_list.length ())
3273 address_cost_data_list.safe_grow_cleared (data_index + 1);
3275 data = address_cost_data_list[data_index];
3276 if (!data)
3278 HOST_WIDE_INT i;
3279 HOST_WIDE_INT rat, off = 0;
3280 int old_cse_not_expected, width;
3281 unsigned sym_p, var_p, off_p, rat_p, add_c;
3282 rtx_insn *seq;
3283 rtx addr, base;
3284 rtx reg0, reg1;
3286 data = (address_cost_data) xcalloc (1, sizeof (*data));
3288 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3290 width = GET_MODE_BITSIZE (address_mode) - 1;
3291 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3292 width = HOST_BITS_PER_WIDE_INT - 1;
3293 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3295 for (i = width; i >= 0; i--)
3297 off = -((unsigned HOST_WIDE_INT) 1 << i);
3298 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3299 if (memory_address_addr_space_p (mem_mode, addr, as))
3300 break;
3302 data->min_offset = (i == -1? 0 : off);
3304 for (i = width; i >= 0; i--)
3306 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3307 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3308 if (memory_address_addr_space_p (mem_mode, addr, as))
3309 break;
3310 /* For some TARGET, like ARM THUMB1, the offset should be nature
3311 aligned. Try an aligned offset if address_mode is not QImode. */
3312 off = (address_mode == QImode)
3314 : ((unsigned HOST_WIDE_INT) 1 << i)
3315 - GET_MODE_SIZE (address_mode);
3316 if (off > 0)
3318 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3319 if (memory_address_addr_space_p (mem_mode, addr, as))
3320 break;
3323 if (i == -1)
3324 off = 0;
3325 data->max_offset = off;
3327 if (dump_file && (dump_flags & TDF_DETAILS))
3329 fprintf (dump_file, "get_address_cost:\n");
3330 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3331 GET_MODE_NAME (mem_mode),
3332 data->min_offset);
3333 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3334 GET_MODE_NAME (mem_mode),
3335 data->max_offset);
3338 rat = 1;
3339 for (i = 2; i <= MAX_RATIO; i++)
3340 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3342 rat = i;
3343 break;
3346 /* Compute the cost of various addressing modes. */
3347 acost = 0;
3348 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3349 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3351 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3352 || USE_STORE_PRE_DECREMENT (mem_mode))
3354 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3355 has_predec[mem_mode]
3356 = memory_address_addr_space_p (mem_mode, addr, as);
3358 if (has_predec[mem_mode])
3359 data->ainc_costs[AINC_PRE_DEC]
3360 = address_cost (addr, mem_mode, as, speed);
3362 if (USE_LOAD_POST_DECREMENT (mem_mode)
3363 || USE_STORE_POST_DECREMENT (mem_mode))
3365 addr = gen_rtx_POST_DEC (address_mode, reg0);
3366 has_postdec[mem_mode]
3367 = memory_address_addr_space_p (mem_mode, addr, as);
3369 if (has_postdec[mem_mode])
3370 data->ainc_costs[AINC_POST_DEC]
3371 = address_cost (addr, mem_mode, as, speed);
3373 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3374 || USE_STORE_PRE_DECREMENT (mem_mode))
3376 addr = gen_rtx_PRE_INC (address_mode, reg0);
3377 has_preinc[mem_mode]
3378 = memory_address_addr_space_p (mem_mode, addr, as);
3380 if (has_preinc[mem_mode])
3381 data->ainc_costs[AINC_PRE_INC]
3382 = address_cost (addr, mem_mode, as, speed);
3384 if (USE_LOAD_POST_INCREMENT (mem_mode)
3385 || USE_STORE_POST_INCREMENT (mem_mode))
3387 addr = gen_rtx_POST_INC (address_mode, reg0);
3388 has_postinc[mem_mode]
3389 = memory_address_addr_space_p (mem_mode, addr, as);
3391 if (has_postinc[mem_mode])
3392 data->ainc_costs[AINC_POST_INC]
3393 = address_cost (addr, mem_mode, as, speed);
3395 for (i = 0; i < 16; i++)
3397 sym_p = i & 1;
3398 var_p = (i >> 1) & 1;
3399 off_p = (i >> 2) & 1;
3400 rat_p = (i >> 3) & 1;
3402 addr = reg0;
3403 if (rat_p)
3404 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3405 gen_int_mode (rat, address_mode));
3407 if (var_p)
3408 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3410 if (sym_p)
3412 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3413 /* ??? We can run into trouble with some backends by presenting
3414 it with symbols which haven't been properly passed through
3415 targetm.encode_section_info. By setting the local bit, we
3416 enhance the probability of things working. */
3417 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3419 if (off_p)
3420 base = gen_rtx_fmt_e (CONST, address_mode,
3421 gen_rtx_fmt_ee
3422 (PLUS, address_mode, base,
3423 gen_int_mode (off, address_mode)));
3425 else if (off_p)
3426 base = gen_int_mode (off, address_mode);
3427 else
3428 base = NULL_RTX;
3430 if (base)
3431 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3433 start_sequence ();
3434 /* To avoid splitting addressing modes, pretend that no cse will
3435 follow. */
3436 old_cse_not_expected = cse_not_expected;
3437 cse_not_expected = true;
3438 addr = memory_address_addr_space (mem_mode, addr, as);
3439 cse_not_expected = old_cse_not_expected;
3440 seq = get_insns ();
3441 end_sequence ();
3443 acost = seq_cost (seq, speed);
3444 acost += address_cost (addr, mem_mode, as, speed);
3446 if (!acost)
3447 acost = 1;
3448 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3451 /* On some targets, it is quite expensive to load symbol to a register,
3452 which makes addresses that contain symbols look much more expensive.
3453 However, the symbol will have to be loaded in any case before the
3454 loop (and quite likely we have it in register already), so it does not
3455 make much sense to penalize them too heavily. So make some final
3456 tweaks for the SYMBOL_PRESENT modes:
3458 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3459 var is cheaper, use this mode with small penalty.
3460 If VAR_PRESENT is true, try whether the mode with
3461 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3462 if this is the case, use it. */
3463 add_c = add_cost (speed, address_mode);
3464 for (i = 0; i < 8; i++)
3466 var_p = i & 1;
3467 off_p = (i >> 1) & 1;
3468 rat_p = (i >> 2) & 1;
3470 acost = data->costs[0][1][off_p][rat_p] + 1;
3471 if (var_p)
3472 acost += add_c;
3474 if (acost < data->costs[1][var_p][off_p][rat_p])
3475 data->costs[1][var_p][off_p][rat_p] = acost;
3478 if (dump_file && (dump_flags & TDF_DETAILS))
3480 fprintf (dump_file, "Address costs:\n");
3482 for (i = 0; i < 16; i++)
3484 sym_p = i & 1;
3485 var_p = (i >> 1) & 1;
3486 off_p = (i >> 2) & 1;
3487 rat_p = (i >> 3) & 1;
3489 fprintf (dump_file, " ");
3490 if (sym_p)
3491 fprintf (dump_file, "sym + ");
3492 if (var_p)
3493 fprintf (dump_file, "var + ");
3494 if (off_p)
3495 fprintf (dump_file, "cst + ");
3496 if (rat_p)
3497 fprintf (dump_file, "rat * ");
3499 acost = data->costs[sym_p][var_p][off_p][rat_p];
3500 fprintf (dump_file, "index costs %d\n", acost);
3502 if (has_predec[mem_mode] || has_postdec[mem_mode]
3503 || has_preinc[mem_mode] || has_postinc[mem_mode])
3504 fprintf (dump_file, " May include autoinc/dec\n");
3505 fprintf (dump_file, "\n");
3508 address_cost_data_list[data_index] = data;
3511 bits = GET_MODE_BITSIZE (address_mode);
3512 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3513 offset &= mask;
3514 if ((offset >> (bits - 1) & 1))
3515 offset |= ~mask;
3516 s_offset = offset;
3518 autoinc = false;
3519 autoinc_type = AINC_NONE;
3520 msize = GET_MODE_SIZE (mem_mode);
3521 autoinc_offset = offset;
3522 if (stmt_after_inc)
3523 autoinc_offset += ratio * cstep;
3524 if (symbol_present || var_present || ratio != 1)
3525 autoinc = false;
3526 else
3528 if (has_postinc[mem_mode] && autoinc_offset == 0
3529 && msize == cstep)
3530 autoinc_type = AINC_POST_INC;
3531 else if (has_postdec[mem_mode] && autoinc_offset == 0
3532 && msize == -cstep)
3533 autoinc_type = AINC_POST_DEC;
3534 else if (has_preinc[mem_mode] && autoinc_offset == msize
3535 && msize == cstep)
3536 autoinc_type = AINC_PRE_INC;
3537 else if (has_predec[mem_mode] && autoinc_offset == -msize
3538 && msize == -cstep)
3539 autoinc_type = AINC_PRE_DEC;
3541 if (autoinc_type != AINC_NONE)
3542 autoinc = true;
3545 cost = 0;
3546 offset_p = (s_offset != 0
3547 && data->min_offset <= s_offset
3548 && s_offset <= data->max_offset);
3549 ratio_p = (ratio != 1
3550 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3552 if (ratio != 1 && !ratio_p)
3553 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3555 if (s_offset && !offset_p && !symbol_present)
3556 cost += add_cost (speed, address_mode);
3558 if (may_autoinc)
3559 *may_autoinc = autoinc;
3560 if (autoinc)
3561 acost = data->ainc_costs[autoinc_type];
3562 else
3563 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3564 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3565 return new_cost (cost + acost, complexity);
3568 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3569 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3570 calculating the operands of EXPR. Returns true if successful, and returns
3571 the cost in COST. */
3573 static bool
3574 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3575 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3577 comp_cost res;
3578 tree op1 = TREE_OPERAND (expr, 1);
3579 tree cst = TREE_OPERAND (mult, 1);
3580 tree multop = TREE_OPERAND (mult, 0);
3581 int m = exact_log2 (int_cst_value (cst));
3582 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3583 int sa_cost;
3584 bool equal_p = false;
3586 if (!(m >= 0 && m < maxm))
3587 return false;
3589 if (operand_equal_p (op1, mult, 0))
3590 equal_p = true;
3592 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3593 ? shiftadd_cost (speed, mode, m)
3594 : (equal_p
3595 ? shiftsub1_cost (speed, mode, m)
3596 : shiftsub0_cost (speed, mode, m)));
3597 res = new_cost (sa_cost, 0);
3598 res = add_costs (res, equal_p ? cost0 : cost1);
3600 STRIP_NOPS (multop);
3601 if (!is_gimple_val (multop))
3602 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3604 *cost = res;
3605 return true;
3608 /* Estimates cost of forcing expression EXPR into a variable. */
3610 static comp_cost
3611 force_expr_to_var_cost (tree expr, bool speed)
3613 static bool costs_initialized = false;
3614 static unsigned integer_cost [2];
3615 static unsigned symbol_cost [2];
3616 static unsigned address_cost [2];
3617 tree op0, op1;
3618 comp_cost cost0, cost1, cost;
3619 machine_mode mode;
3621 if (!costs_initialized)
3623 tree type = build_pointer_type (integer_type_node);
3624 tree var, addr;
3625 rtx x;
3626 int i;
3628 var = create_tmp_var_raw (integer_type_node, "test_var");
3629 TREE_STATIC (var) = 1;
3630 x = produce_memory_decl_rtl (var, NULL);
3631 SET_DECL_RTL (var, x);
3633 addr = build1 (ADDR_EXPR, type, var);
3636 for (i = 0; i < 2; i++)
3638 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3639 2000), i);
3641 symbol_cost[i] = computation_cost (addr, i) + 1;
3643 address_cost[i]
3644 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3645 if (dump_file && (dump_flags & TDF_DETAILS))
3647 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3648 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3649 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3650 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3651 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3652 fprintf (dump_file, "\n");
3656 costs_initialized = true;
3659 STRIP_NOPS (expr);
3661 if (SSA_VAR_P (expr))
3662 return no_cost;
3664 if (is_gimple_min_invariant (expr))
3666 if (TREE_CODE (expr) == INTEGER_CST)
3667 return new_cost (integer_cost [speed], 0);
3669 if (TREE_CODE (expr) == ADDR_EXPR)
3671 tree obj = TREE_OPERAND (expr, 0);
3673 if (TREE_CODE (obj) == VAR_DECL
3674 || TREE_CODE (obj) == PARM_DECL
3675 || TREE_CODE (obj) == RESULT_DECL)
3676 return new_cost (symbol_cost [speed], 0);
3679 return new_cost (address_cost [speed], 0);
3682 switch (TREE_CODE (expr))
3684 case POINTER_PLUS_EXPR:
3685 case PLUS_EXPR:
3686 case MINUS_EXPR:
3687 case MULT_EXPR:
3688 op0 = TREE_OPERAND (expr, 0);
3689 op1 = TREE_OPERAND (expr, 1);
3690 STRIP_NOPS (op0);
3691 STRIP_NOPS (op1);
3692 break;
3694 CASE_CONVERT:
3695 case NEGATE_EXPR:
3696 op0 = TREE_OPERAND (expr, 0);
3697 STRIP_NOPS (op0);
3698 op1 = NULL_TREE;
3699 break;
3701 default:
3702 /* Just an arbitrary value, FIXME. */
3703 return new_cost (target_spill_cost[speed], 0);
3706 if (op0 == NULL_TREE
3707 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3708 cost0 = no_cost;
3709 else
3710 cost0 = force_expr_to_var_cost (op0, speed);
3712 if (op1 == NULL_TREE
3713 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3714 cost1 = no_cost;
3715 else
3716 cost1 = force_expr_to_var_cost (op1, speed);
3718 mode = TYPE_MODE (TREE_TYPE (expr));
3719 switch (TREE_CODE (expr))
3721 case POINTER_PLUS_EXPR:
3722 case PLUS_EXPR:
3723 case MINUS_EXPR:
3724 case NEGATE_EXPR:
3725 cost = new_cost (add_cost (speed, mode), 0);
3726 if (TREE_CODE (expr) != NEGATE_EXPR)
3728 tree mult = NULL_TREE;
3729 comp_cost sa_cost;
3730 if (TREE_CODE (op1) == MULT_EXPR)
3731 mult = op1;
3732 else if (TREE_CODE (op0) == MULT_EXPR)
3733 mult = op0;
3735 if (mult != NULL_TREE
3736 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3737 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3738 speed, &sa_cost))
3739 return sa_cost;
3741 break;
3743 CASE_CONVERT:
3745 tree inner_mode, outer_mode;
3746 outer_mode = TREE_TYPE (expr);
3747 inner_mode = TREE_TYPE (op0);
3748 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3749 TYPE_MODE (inner_mode), speed), 0);
3751 break;
3753 case MULT_EXPR:
3754 if (cst_and_fits_in_hwi (op0))
3755 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3756 mode, speed), 0);
3757 else if (cst_and_fits_in_hwi (op1))
3758 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3759 mode, speed), 0);
3760 else
3761 return new_cost (target_spill_cost [speed], 0);
3762 break;
3764 default:
3765 gcc_unreachable ();
3768 cost = add_costs (cost, cost0);
3769 cost = add_costs (cost, cost1);
3771 /* Bound the cost by target_spill_cost. The parts of complicated
3772 computations often are either loop invariant or at least can
3773 be shared between several iv uses, so letting this grow without
3774 limits would not give reasonable results. */
3775 if (cost.cost > (int) target_spill_cost [speed])
3776 cost.cost = target_spill_cost [speed];
3778 return cost;
3781 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3782 invariants the computation depends on. */
3784 static comp_cost
3785 force_var_cost (struct ivopts_data *data,
3786 tree expr, bitmap *depends_on)
3788 if (depends_on)
3790 fd_ivopts_data = data;
3791 walk_tree (&expr, find_depends, depends_on, NULL);
3794 return force_expr_to_var_cost (expr, data->speed);
3797 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3798 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3799 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3800 invariants the computation depends on. */
3802 static comp_cost
3803 split_address_cost (struct ivopts_data *data,
3804 tree addr, bool *symbol_present, bool *var_present,
3805 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3807 tree core;
3808 HOST_WIDE_INT bitsize;
3809 HOST_WIDE_INT bitpos;
3810 tree toffset;
3811 machine_mode mode;
3812 int unsignedp, volatilep;
3814 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3815 &unsignedp, &volatilep, false);
3817 if (toffset != 0
3818 || bitpos % BITS_PER_UNIT != 0
3819 || TREE_CODE (core) != VAR_DECL)
3821 *symbol_present = false;
3822 *var_present = true;
3823 fd_ivopts_data = data;
3824 walk_tree (&addr, find_depends, depends_on, NULL);
3825 return new_cost (target_spill_cost[data->speed], 0);
3828 *offset += bitpos / BITS_PER_UNIT;
3829 if (TREE_STATIC (core)
3830 || DECL_EXTERNAL (core))
3832 *symbol_present = true;
3833 *var_present = false;
3834 return no_cost;
3837 *symbol_present = false;
3838 *var_present = true;
3839 return no_cost;
3842 /* Estimates cost of expressing difference of addresses E1 - E2 as
3843 var + symbol + offset. The value of offset is added to OFFSET,
3844 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3845 part is missing. DEPENDS_ON is a set of the invariants the computation
3846 depends on. */
3848 static comp_cost
3849 ptr_difference_cost (struct ivopts_data *data,
3850 tree e1, tree e2, bool *symbol_present, bool *var_present,
3851 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3853 HOST_WIDE_INT diff = 0;
3854 aff_tree aff_e1, aff_e2;
3855 tree type;
3857 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3859 if (ptr_difference_const (e1, e2, &diff))
3861 *offset += diff;
3862 *symbol_present = false;
3863 *var_present = false;
3864 return no_cost;
3867 if (integer_zerop (e2))
3868 return split_address_cost (data, TREE_OPERAND (e1, 0),
3869 symbol_present, var_present, offset, depends_on);
3871 *symbol_present = false;
3872 *var_present = true;
3874 type = signed_type_for (TREE_TYPE (e1));
3875 tree_to_aff_combination (e1, type, &aff_e1);
3876 tree_to_aff_combination (e2, type, &aff_e2);
3877 aff_combination_scale (&aff_e2, -1);
3878 aff_combination_add (&aff_e1, &aff_e2);
3880 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3883 /* Estimates cost of expressing difference E1 - E2 as
3884 var + symbol + offset. The value of offset is added to OFFSET,
3885 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3886 part is missing. DEPENDS_ON is a set of the invariants the computation
3887 depends on. */
3889 static comp_cost
3890 difference_cost (struct ivopts_data *data,
3891 tree e1, tree e2, bool *symbol_present, bool *var_present,
3892 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3894 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3895 unsigned HOST_WIDE_INT off1, off2;
3896 aff_tree aff_e1, aff_e2;
3897 tree type;
3899 e1 = strip_offset (e1, &off1);
3900 e2 = strip_offset (e2, &off2);
3901 *offset += off1 - off2;
3903 STRIP_NOPS (e1);
3904 STRIP_NOPS (e2);
3906 if (TREE_CODE (e1) == ADDR_EXPR)
3907 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3908 offset, depends_on);
3909 *symbol_present = false;
3911 if (operand_equal_p (e1, e2, 0))
3913 *var_present = false;
3914 return no_cost;
3917 *var_present = true;
3919 if (integer_zerop (e2))
3920 return force_var_cost (data, e1, depends_on);
3922 if (integer_zerop (e1))
3924 comp_cost cost = force_var_cost (data, e2, depends_on);
3925 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3926 return cost;
3929 type = signed_type_for (TREE_TYPE (e1));
3930 tree_to_aff_combination (e1, type, &aff_e1);
3931 tree_to_aff_combination (e2, type, &aff_e2);
3932 aff_combination_scale (&aff_e2, -1);
3933 aff_combination_add (&aff_e1, &aff_e2);
3935 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3938 /* Returns true if AFF1 and AFF2 are identical. */
3940 static bool
3941 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3943 unsigned i;
3945 if (aff1->n != aff2->n)
3946 return false;
3948 for (i = 0; i < aff1->n; i++)
3950 if (aff1->elts[i].coef != aff2->elts[i].coef)
3951 return false;
3953 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3954 return false;
3956 return true;
3959 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3961 static int
3962 get_expr_id (struct ivopts_data *data, tree expr)
3964 struct iv_inv_expr_ent ent;
3965 struct iv_inv_expr_ent **slot;
3967 ent.expr = expr;
3968 ent.hash = iterative_hash_expr (expr, 0);
3969 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3970 if (*slot)
3971 return (*slot)->id;
3973 *slot = XNEW (struct iv_inv_expr_ent);
3974 (*slot)->expr = expr;
3975 (*slot)->hash = ent.hash;
3976 (*slot)->id = data->inv_expr_id++;
3977 return (*slot)->id;
3980 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3981 requires a new compiler generated temporary. Returns -1 otherwise.
3982 ADDRESS_P is a flag indicating if the expression is for address
3983 computation. */
3985 static int
3986 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3987 tree cbase, HOST_WIDE_INT ratio,
3988 bool address_p)
3990 aff_tree ubase_aff, cbase_aff;
3991 tree expr, ub, cb;
3993 STRIP_NOPS (ubase);
3994 STRIP_NOPS (cbase);
3995 ub = ubase;
3996 cb = cbase;
3998 if ((TREE_CODE (ubase) == INTEGER_CST)
3999 && (TREE_CODE (cbase) == INTEGER_CST))
4000 return -1;
4002 /* Strips the constant part. */
4003 if (TREE_CODE (ubase) == PLUS_EXPR
4004 || TREE_CODE (ubase) == MINUS_EXPR
4005 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4007 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4008 ubase = TREE_OPERAND (ubase, 0);
4011 /* Strips the constant part. */
4012 if (TREE_CODE (cbase) == PLUS_EXPR
4013 || TREE_CODE (cbase) == MINUS_EXPR
4014 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4016 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4017 cbase = TREE_OPERAND (cbase, 0);
4020 if (address_p)
4022 if (((TREE_CODE (ubase) == SSA_NAME)
4023 || (TREE_CODE (ubase) == ADDR_EXPR
4024 && is_gimple_min_invariant (ubase)))
4025 && (TREE_CODE (cbase) == INTEGER_CST))
4026 return -1;
4028 if (((TREE_CODE (cbase) == SSA_NAME)
4029 || (TREE_CODE (cbase) == ADDR_EXPR
4030 && is_gimple_min_invariant (cbase)))
4031 && (TREE_CODE (ubase) == INTEGER_CST))
4032 return -1;
4035 if (ratio == 1)
4037 if (operand_equal_p (ubase, cbase, 0))
4038 return -1;
4040 if (TREE_CODE (ubase) == ADDR_EXPR
4041 && TREE_CODE (cbase) == ADDR_EXPR)
4043 tree usym, csym;
4045 usym = TREE_OPERAND (ubase, 0);
4046 csym = TREE_OPERAND (cbase, 0);
4047 if (TREE_CODE (usym) == ARRAY_REF)
4049 tree ind = TREE_OPERAND (usym, 1);
4050 if (TREE_CODE (ind) == INTEGER_CST
4051 && tree_fits_shwi_p (ind)
4052 && tree_to_shwi (ind) == 0)
4053 usym = TREE_OPERAND (usym, 0);
4055 if (TREE_CODE (csym) == ARRAY_REF)
4057 tree ind = TREE_OPERAND (csym, 1);
4058 if (TREE_CODE (ind) == INTEGER_CST
4059 && tree_fits_shwi_p (ind)
4060 && tree_to_shwi (ind) == 0)
4061 csym = TREE_OPERAND (csym, 0);
4063 if (operand_equal_p (usym, csym, 0))
4064 return -1;
4066 /* Now do more complex comparison */
4067 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4068 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4069 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4070 return -1;
4073 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4074 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4076 aff_combination_scale (&cbase_aff, -1 * ratio);
4077 aff_combination_add (&ubase_aff, &cbase_aff);
4078 expr = aff_combination_to_tree (&ubase_aff);
4079 return get_expr_id (data, expr);
4084 /* Determines the cost of the computation by that USE is expressed
4085 from induction variable CAND. If ADDRESS_P is true, we just need
4086 to create an address from it, otherwise we want to get it into
4087 register. A set of invariants we depend on is stored in
4088 DEPENDS_ON. AT is the statement at that the value is computed.
4089 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4090 addressing is likely. */
4092 static comp_cost
4093 get_computation_cost_at (struct ivopts_data *data,
4094 struct iv_use *use, struct iv_cand *cand,
4095 bool address_p, bitmap *depends_on, gimple at,
4096 bool *can_autoinc,
4097 int *inv_expr_id)
4099 tree ubase = use->iv->base, ustep = use->iv->step;
4100 tree cbase, cstep;
4101 tree utype = TREE_TYPE (ubase), ctype;
4102 unsigned HOST_WIDE_INT cstepi, offset = 0;
4103 HOST_WIDE_INT ratio, aratio;
4104 bool var_present, symbol_present, stmt_is_after_inc;
4105 comp_cost cost;
4106 widest_int rat;
4107 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4108 machine_mode mem_mode = (address_p
4109 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4110 : VOIDmode);
4112 *depends_on = NULL;
4114 /* Only consider real candidates. */
4115 if (!cand->iv)
4116 return infinite_cost;
4118 cbase = cand->iv->base;
4119 cstep = cand->iv->step;
4120 ctype = TREE_TYPE (cbase);
4122 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4124 /* We do not have a precision to express the values of use. */
4125 return infinite_cost;
4128 if (address_p
4129 || (use->iv->base_object
4130 && cand->iv->base_object
4131 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4132 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4134 /* Do not try to express address of an object with computation based
4135 on address of a different object. This may cause problems in rtl
4136 level alias analysis (that does not expect this to be happening,
4137 as this is illegal in C), and would be unlikely to be useful
4138 anyway. */
4139 if (use->iv->base_object
4140 && cand->iv->base_object
4141 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4142 return infinite_cost;
4145 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4147 /* TODO -- add direct handling of this case. */
4148 goto fallback;
4151 /* CSTEPI is removed from the offset in case statement is after the
4152 increment. If the step is not constant, we use zero instead.
4153 This is a bit imprecise (there is the extra addition), but
4154 redundancy elimination is likely to transform the code so that
4155 it uses value of the variable before increment anyway,
4156 so it is not that much unrealistic. */
4157 if (cst_and_fits_in_hwi (cstep))
4158 cstepi = int_cst_value (cstep);
4159 else
4160 cstepi = 0;
4162 if (!constant_multiple_of (ustep, cstep, &rat))
4163 return infinite_cost;
4165 if (wi::fits_shwi_p (rat))
4166 ratio = rat.to_shwi ();
4167 else
4168 return infinite_cost;
4170 STRIP_NOPS (cbase);
4171 ctype = TREE_TYPE (cbase);
4173 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4175 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4176 or ratio == 1, it is better to handle this like
4178 ubase - ratio * cbase + ratio * var
4180 (also holds in the case ratio == -1, TODO. */
4182 if (cst_and_fits_in_hwi (cbase))
4184 offset = - ratio * int_cst_value (cbase);
4185 cost = difference_cost (data,
4186 ubase, build_int_cst (utype, 0),
4187 &symbol_present, &var_present, &offset,
4188 depends_on);
4189 cost.cost /= avg_loop_niter (data->current_loop);
4191 else if (ratio == 1)
4193 tree real_cbase = cbase;
4195 /* Check to see if any adjustment is needed. */
4196 if (cstepi == 0 && stmt_is_after_inc)
4198 aff_tree real_cbase_aff;
4199 aff_tree cstep_aff;
4201 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4202 &real_cbase_aff);
4203 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4205 aff_combination_add (&real_cbase_aff, &cstep_aff);
4206 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4209 cost = difference_cost (data,
4210 ubase, real_cbase,
4211 &symbol_present, &var_present, &offset,
4212 depends_on);
4213 cost.cost /= avg_loop_niter (data->current_loop);
4215 else if (address_p
4216 && !POINTER_TYPE_P (ctype)
4217 && multiplier_allowed_in_address_p
4218 (ratio, mem_mode,
4219 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4221 cbase
4222 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4223 cost = difference_cost (data,
4224 ubase, cbase,
4225 &symbol_present, &var_present, &offset,
4226 depends_on);
4227 cost.cost /= avg_loop_niter (data->current_loop);
4229 else
4231 cost = force_var_cost (data, cbase, depends_on);
4232 cost = add_costs (cost,
4233 difference_cost (data,
4234 ubase, build_int_cst (utype, 0),
4235 &symbol_present, &var_present,
4236 &offset, depends_on));
4237 cost.cost /= avg_loop_niter (data->current_loop);
4238 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4241 if (inv_expr_id)
4243 *inv_expr_id =
4244 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4245 /* Clear depends on. */
4246 if (*inv_expr_id != -1 && depends_on && *depends_on)
4247 bitmap_clear (*depends_on);
4250 /* If we are after the increment, the value of the candidate is higher by
4251 one iteration. */
4252 if (stmt_is_after_inc)
4253 offset -= ratio * cstepi;
4255 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4256 (symbol/var1/const parts may be omitted). If we are looking for an
4257 address, find the cost of addressing this. */
4258 if (address_p)
4259 return add_costs (cost,
4260 get_address_cost (symbol_present, var_present,
4261 offset, ratio, cstepi,
4262 mem_mode,
4263 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4264 speed, stmt_is_after_inc,
4265 can_autoinc));
4267 /* Otherwise estimate the costs for computing the expression. */
4268 if (!symbol_present && !var_present && !offset)
4270 if (ratio != 1)
4271 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4272 return cost;
4275 /* Symbol + offset should be compile-time computable so consider that they
4276 are added once to the variable, if present. */
4277 if (var_present && (symbol_present || offset))
4278 cost.cost += adjust_setup_cost (data,
4279 add_cost (speed, TYPE_MODE (ctype)));
4281 /* Having offset does not affect runtime cost in case it is added to
4282 symbol, but it increases complexity. */
4283 if (offset)
4284 cost.complexity++;
4286 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4288 aratio = ratio > 0 ? ratio : -ratio;
4289 if (aratio != 1)
4290 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4291 return cost;
4293 fallback:
4294 if (can_autoinc)
4295 *can_autoinc = false;
4298 /* Just get the expression, expand it and measure the cost. */
4299 tree comp = get_computation_at (data->current_loop, use, cand, at);
4301 if (!comp)
4302 return infinite_cost;
4304 if (address_p)
4305 comp = build_simple_mem_ref (comp);
4307 return new_cost (computation_cost (comp, speed), 0);
4311 /* Determines the cost of the computation by that USE is expressed
4312 from induction variable CAND. If ADDRESS_P is true, we just need
4313 to create an address from it, otherwise we want to get it into
4314 register. A set of invariants we depend on is stored in
4315 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4316 autoinc addressing is likely. */
4318 static comp_cost
4319 get_computation_cost (struct ivopts_data *data,
4320 struct iv_use *use, struct iv_cand *cand,
4321 bool address_p, bitmap *depends_on,
4322 bool *can_autoinc, int *inv_expr_id)
4324 return get_computation_cost_at (data,
4325 use, cand, address_p, depends_on, use->stmt,
4326 can_autoinc, inv_expr_id);
4329 /* Determines cost of basing replacement of USE on CAND in a generic
4330 expression. */
4332 static bool
4333 determine_use_iv_cost_generic (struct ivopts_data *data,
4334 struct iv_use *use, struct iv_cand *cand)
4336 bitmap depends_on;
4337 comp_cost cost;
4338 int inv_expr_id = -1;
4340 /* The simple case first -- if we need to express value of the preserved
4341 original biv, the cost is 0. This also prevents us from counting the
4342 cost of increment twice -- once at this use and once in the cost of
4343 the candidate. */
4344 if (cand->pos == IP_ORIGINAL
4345 && cand->incremented_at == use->stmt)
4347 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4348 ERROR_MARK, -1);
4349 return true;
4352 cost = get_computation_cost (data, use, cand, false, &depends_on,
4353 NULL, &inv_expr_id);
4355 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4356 inv_expr_id);
4358 return !infinite_cost_p (cost);
4361 /* Determines cost of basing replacement of USE on CAND in an address. */
4363 static bool
4364 determine_use_iv_cost_address (struct ivopts_data *data,
4365 struct iv_use *use, struct iv_cand *cand)
4367 bitmap depends_on;
4368 bool can_autoinc;
4369 int inv_expr_id = -1;
4370 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4371 &can_autoinc, &inv_expr_id);
4373 if (cand->ainc_use == use)
4375 if (can_autoinc)
4376 cost.cost -= cand->cost_step;
4377 /* If we generated the candidate solely for exploiting autoincrement
4378 opportunities, and it turns out it can't be used, set the cost to
4379 infinity to make sure we ignore it. */
4380 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4381 cost = infinite_cost;
4383 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4384 inv_expr_id);
4386 return !infinite_cost_p (cost);
4389 /* Computes value of candidate CAND at position AT in iteration NITER, and
4390 stores it to VAL. */
4392 static void
4393 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4394 aff_tree *val)
4396 aff_tree step, delta, nit;
4397 struct iv *iv = cand->iv;
4398 tree type = TREE_TYPE (iv->base);
4399 tree steptype = type;
4400 if (POINTER_TYPE_P (type))
4401 steptype = sizetype;
4402 steptype = unsigned_type_for (type);
4404 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4405 aff_combination_convert (&step, steptype);
4406 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4407 aff_combination_convert (&nit, steptype);
4408 aff_combination_mult (&nit, &step, &delta);
4409 if (stmt_after_increment (loop, cand, at))
4410 aff_combination_add (&delta, &step);
4412 tree_to_aff_combination (iv->base, type, val);
4413 if (!POINTER_TYPE_P (type))
4414 aff_combination_convert (val, steptype);
4415 aff_combination_add (val, &delta);
4418 /* Returns period of induction variable iv. */
4420 static tree
4421 iv_period (struct iv *iv)
4423 tree step = iv->step, period, type;
4424 tree pow2div;
4426 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4428 type = unsigned_type_for (TREE_TYPE (step));
4429 /* Period of the iv is lcm (step, type_range)/step -1,
4430 i.e., N*type_range/step - 1. Since type range is power
4431 of two, N == (step >> num_of_ending_zeros_binary (step),
4432 so the final result is
4434 (type_range >> num_of_ending_zeros_binary (step)) - 1
4437 pow2div = num_ending_zeros (step);
4439 period = build_low_bits_mask (type,
4440 (TYPE_PRECISION (type)
4441 - tree_to_uhwi (pow2div)));
4443 return period;
4446 /* Returns the comparison operator used when eliminating the iv USE. */
4448 static enum tree_code
4449 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4451 struct loop *loop = data->current_loop;
4452 basic_block ex_bb;
4453 edge exit;
4455 ex_bb = gimple_bb (use->stmt);
4456 exit = EDGE_SUCC (ex_bb, 0);
4457 if (flow_bb_inside_loop_p (loop, exit->dest))
4458 exit = EDGE_SUCC (ex_bb, 1);
4460 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4463 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4464 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4465 calculation is performed in non-wrapping type.
4467 TODO: More generally, we could test for the situation that
4468 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4469 This would require knowing the sign of OFFSET. */
4471 static bool
4472 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4474 enum tree_code code;
4475 tree e1, e2;
4476 aff_tree aff_e1, aff_e2, aff_offset;
4478 if (!nowrap_type_p (TREE_TYPE (base)))
4479 return false;
4481 base = expand_simple_operations (base);
4483 if (TREE_CODE (base) == SSA_NAME)
4485 gimple stmt = SSA_NAME_DEF_STMT (base);
4487 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4488 return false;
4490 code = gimple_assign_rhs_code (stmt);
4491 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4492 return false;
4494 e1 = gimple_assign_rhs1 (stmt);
4495 e2 = gimple_assign_rhs2 (stmt);
4497 else
4499 code = TREE_CODE (base);
4500 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4501 return false;
4502 e1 = TREE_OPERAND (base, 0);
4503 e2 = TREE_OPERAND (base, 1);
4506 /* Use affine expansion as deeper inspection to prove the equality. */
4507 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4508 &aff_e2, &data->name_expansion_cache);
4509 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4510 &aff_offset, &data->name_expansion_cache);
4511 aff_combination_scale (&aff_offset, -1);
4512 switch (code)
4514 case PLUS_EXPR:
4515 aff_combination_add (&aff_e2, &aff_offset);
4516 if (aff_combination_zero_p (&aff_e2))
4517 return true;
4519 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4520 &aff_e1, &data->name_expansion_cache);
4521 aff_combination_add (&aff_e1, &aff_offset);
4522 return aff_combination_zero_p (&aff_e1);
4524 case POINTER_PLUS_EXPR:
4525 aff_combination_add (&aff_e2, &aff_offset);
4526 return aff_combination_zero_p (&aff_e2);
4528 default:
4529 return false;
4533 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4534 comparison with CAND. NITER describes the number of iterations of
4535 the loops. If successful, the comparison in COMP_P is altered accordingly.
4537 We aim to handle the following situation:
4539 sometype *base, *p;
4540 int a, b, i;
4542 i = a;
4543 p = p_0 = base + a;
4547 bla (*p);
4548 p++;
4549 i++;
4551 while (i < b);
4553 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4554 We aim to optimize this to
4556 p = p_0 = base + a;
4559 bla (*p);
4560 p++;
4562 while (p < p_0 - a + b);
4564 This preserves the correctness, since the pointer arithmetics does not
4565 overflow. More precisely:
4567 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4568 overflow in computing it or the values of p.
4569 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4570 overflow. To prove this, we use the fact that p_0 = base + a. */
4572 static bool
4573 iv_elimination_compare_lt (struct ivopts_data *data,
4574 struct iv_cand *cand, enum tree_code *comp_p,
4575 struct tree_niter_desc *niter)
4577 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4578 struct aff_tree nit, tmpa, tmpb;
4579 enum tree_code comp;
4580 HOST_WIDE_INT step;
4582 /* We need to know that the candidate induction variable does not overflow.
4583 While more complex analysis may be used to prove this, for now just
4584 check that the variable appears in the original program and that it
4585 is computed in a type that guarantees no overflows. */
4586 cand_type = TREE_TYPE (cand->iv->base);
4587 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4588 return false;
4590 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4591 the calculation of the BOUND could overflow, making the comparison
4592 invalid. */
4593 if (!data->loop_single_exit_p)
4594 return false;
4596 /* We need to be able to decide whether candidate is increasing or decreasing
4597 in order to choose the right comparison operator. */
4598 if (!cst_and_fits_in_hwi (cand->iv->step))
4599 return false;
4600 step = int_cst_value (cand->iv->step);
4602 /* Check that the number of iterations matches the expected pattern:
4603 a + 1 > b ? 0 : b - a - 1. */
4604 mbz = niter->may_be_zero;
4605 if (TREE_CODE (mbz) == GT_EXPR)
4607 /* Handle a + 1 > b. */
4608 tree op0 = TREE_OPERAND (mbz, 0);
4609 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4611 a = TREE_OPERAND (op0, 0);
4612 b = TREE_OPERAND (mbz, 1);
4614 else
4615 return false;
4617 else if (TREE_CODE (mbz) == LT_EXPR)
4619 tree op1 = TREE_OPERAND (mbz, 1);
4621 /* Handle b < a + 1. */
4622 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4624 a = TREE_OPERAND (op1, 0);
4625 b = TREE_OPERAND (mbz, 0);
4627 else
4628 return false;
4630 else
4631 return false;
4633 /* Expected number of iterations is B - A - 1. Check that it matches
4634 the actual number, i.e., that B - A - NITER = 1. */
4635 tree_to_aff_combination (niter->niter, nit_type, &nit);
4636 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4637 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4638 aff_combination_scale (&nit, -1);
4639 aff_combination_scale (&tmpa, -1);
4640 aff_combination_add (&tmpb, &tmpa);
4641 aff_combination_add (&tmpb, &nit);
4642 if (tmpb.n != 0 || tmpb.offset != 1)
4643 return false;
4645 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4646 overflow. */
4647 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4648 cand->iv->step,
4649 fold_convert (TREE_TYPE (cand->iv->step), a));
4650 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4651 return false;
4653 /* Determine the new comparison operator. */
4654 comp = step < 0 ? GT_EXPR : LT_EXPR;
4655 if (*comp_p == NE_EXPR)
4656 *comp_p = comp;
4657 else if (*comp_p == EQ_EXPR)
4658 *comp_p = invert_tree_comparison (comp, false);
4659 else
4660 gcc_unreachable ();
4662 return true;
4665 /* Check whether it is possible to express the condition in USE by comparison
4666 of candidate CAND. If so, store the value compared with to BOUND, and the
4667 comparison operator to COMP. */
4669 static bool
4670 may_eliminate_iv (struct ivopts_data *data,
4671 struct iv_use *use, struct iv_cand *cand, tree *bound,
4672 enum tree_code *comp)
4674 basic_block ex_bb;
4675 edge exit;
4676 tree period;
4677 struct loop *loop = data->current_loop;
4678 aff_tree bnd;
4679 struct tree_niter_desc *desc = NULL;
4681 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4682 return false;
4684 /* For now works only for exits that dominate the loop latch.
4685 TODO: extend to other conditions inside loop body. */
4686 ex_bb = gimple_bb (use->stmt);
4687 if (use->stmt != last_stmt (ex_bb)
4688 || gimple_code (use->stmt) != GIMPLE_COND
4689 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4690 return false;
4692 exit = EDGE_SUCC (ex_bb, 0);
4693 if (flow_bb_inside_loop_p (loop, exit->dest))
4694 exit = EDGE_SUCC (ex_bb, 1);
4695 if (flow_bb_inside_loop_p (loop, exit->dest))
4696 return false;
4698 desc = niter_for_exit (data, exit);
4699 if (!desc)
4700 return false;
4702 /* Determine whether we can use the variable to test the exit condition.
4703 This is the case iff the period of the induction variable is greater
4704 than the number of iterations for which the exit condition is true. */
4705 period = iv_period (cand->iv);
4707 /* If the number of iterations is constant, compare against it directly. */
4708 if (TREE_CODE (desc->niter) == INTEGER_CST)
4710 /* See cand_value_at. */
4711 if (stmt_after_increment (loop, cand, use->stmt))
4713 if (!tree_int_cst_lt (desc->niter, period))
4714 return false;
4716 else
4718 if (tree_int_cst_lt (period, desc->niter))
4719 return false;
4723 /* If not, and if this is the only possible exit of the loop, see whether
4724 we can get a conservative estimate on the number of iterations of the
4725 entire loop and compare against that instead. */
4726 else
4728 widest_int period_value, max_niter;
4730 max_niter = desc->max;
4731 if (stmt_after_increment (loop, cand, use->stmt))
4732 max_niter += 1;
4733 period_value = wi::to_widest (period);
4734 if (wi::gtu_p (max_niter, period_value))
4736 /* See if we can take advantage of inferred loop bound information. */
4737 if (data->loop_single_exit_p)
4739 if (!max_loop_iterations (loop, &max_niter))
4740 return false;
4741 /* The loop bound is already adjusted by adding 1. */
4742 if (wi::gtu_p (max_niter, period_value))
4743 return false;
4745 else
4746 return false;
4750 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4752 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4753 aff_combination_to_tree (&bnd));
4754 *comp = iv_elimination_compare (data, use);
4756 /* It is unlikely that computing the number of iterations using division
4757 would be more profitable than keeping the original induction variable. */
4758 if (expression_expensive_p (*bound))
4759 return false;
4761 /* Sometimes, it is possible to handle the situation that the number of
4762 iterations may be zero unless additional assumtions by using <
4763 instead of != in the exit condition.
4765 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4766 base the exit condition on it. However, that is often too
4767 expensive. */
4768 if (!integer_zerop (desc->may_be_zero))
4769 return iv_elimination_compare_lt (data, cand, comp, desc);
4771 return true;
4774 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4775 be copied, if is is used in the loop body and DATA->body_includes_call. */
4777 static int
4778 parm_decl_cost (struct ivopts_data *data, tree bound)
4780 tree sbound = bound;
4781 STRIP_NOPS (sbound);
4783 if (TREE_CODE (sbound) == SSA_NAME
4784 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4785 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4786 && data->body_includes_call)
4787 return COSTS_N_INSNS (1);
4789 return 0;
4792 /* Determines cost of basing replacement of USE on CAND in a condition. */
4794 static bool
4795 determine_use_iv_cost_condition (struct ivopts_data *data,
4796 struct iv_use *use, struct iv_cand *cand)
4798 tree bound = NULL_TREE;
4799 struct iv *cmp_iv;
4800 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4801 comp_cost elim_cost, express_cost, cost, bound_cost;
4802 bool ok;
4803 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4804 tree *control_var, *bound_cst;
4805 enum tree_code comp = ERROR_MARK;
4807 /* Only consider real candidates. */
4808 if (!cand->iv)
4810 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4811 ERROR_MARK, -1);
4812 return false;
4815 /* Try iv elimination. */
4816 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4818 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4819 if (elim_cost.cost == 0)
4820 elim_cost.cost = parm_decl_cost (data, bound);
4821 else if (TREE_CODE (bound) == INTEGER_CST)
4822 elim_cost.cost = 0;
4823 /* If we replace a loop condition 'i < n' with 'p < base + n',
4824 depends_on_elim will have 'base' and 'n' set, which implies
4825 that both 'base' and 'n' will be live during the loop. More likely,
4826 'base + n' will be loop invariant, resulting in only one live value
4827 during the loop. So in that case we clear depends_on_elim and set
4828 elim_inv_expr_id instead. */
4829 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4831 elim_inv_expr_id = get_expr_id (data, bound);
4832 bitmap_clear (depends_on_elim);
4834 /* The bound is a loop invariant, so it will be only computed
4835 once. */
4836 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4838 else
4839 elim_cost = infinite_cost;
4841 /* Try expressing the original giv. If it is compared with an invariant,
4842 note that we cannot get rid of it. */
4843 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4844 NULL, &cmp_iv);
4845 gcc_assert (ok);
4847 /* When the condition is a comparison of the candidate IV against
4848 zero, prefer this IV.
4850 TODO: The constant that we're subtracting from the cost should
4851 be target-dependent. This information should be added to the
4852 target costs for each backend. */
4853 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4854 && integer_zerop (*bound_cst)
4855 && (operand_equal_p (*control_var, cand->var_after, 0)
4856 || operand_equal_p (*control_var, cand->var_before, 0)))
4857 elim_cost.cost -= 1;
4859 express_cost = get_computation_cost (data, use, cand, false,
4860 &depends_on_express, NULL,
4861 &express_inv_expr_id);
4862 fd_ivopts_data = data;
4863 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4865 /* Count the cost of the original bound as well. */
4866 bound_cost = force_var_cost (data, *bound_cst, NULL);
4867 if (bound_cost.cost == 0)
4868 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4869 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4870 bound_cost.cost = 0;
4871 express_cost.cost += bound_cost.cost;
4873 /* Choose the better approach, preferring the eliminated IV. */
4874 if (compare_costs (elim_cost, express_cost) <= 0)
4876 cost = elim_cost;
4877 depends_on = depends_on_elim;
4878 depends_on_elim = NULL;
4879 inv_expr_id = elim_inv_expr_id;
4881 else
4883 cost = express_cost;
4884 depends_on = depends_on_express;
4885 depends_on_express = NULL;
4886 bound = NULL_TREE;
4887 comp = ERROR_MARK;
4888 inv_expr_id = express_inv_expr_id;
4891 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4893 if (depends_on_elim)
4894 BITMAP_FREE (depends_on_elim);
4895 if (depends_on_express)
4896 BITMAP_FREE (depends_on_express);
4898 return !infinite_cost_p (cost);
4901 /* Determines cost of basing replacement of USE on CAND. Returns false
4902 if USE cannot be based on CAND. */
4904 static bool
4905 determine_use_iv_cost (struct ivopts_data *data,
4906 struct iv_use *use, struct iv_cand *cand)
4908 switch (use->type)
4910 case USE_NONLINEAR_EXPR:
4911 return determine_use_iv_cost_generic (data, use, cand);
4913 case USE_ADDRESS:
4914 return determine_use_iv_cost_address (data, use, cand);
4916 case USE_COMPARE:
4917 return determine_use_iv_cost_condition (data, use, cand);
4919 default:
4920 gcc_unreachable ();
4924 /* Return true if get_computation_cost indicates that autoincrement is
4925 a possibility for the pair of USE and CAND, false otherwise. */
4927 static bool
4928 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4929 struct iv_cand *cand)
4931 bitmap depends_on;
4932 bool can_autoinc;
4933 comp_cost cost;
4935 if (use->type != USE_ADDRESS)
4936 return false;
4938 cost = get_computation_cost (data, use, cand, true, &depends_on,
4939 &can_autoinc, NULL);
4941 BITMAP_FREE (depends_on);
4943 return !infinite_cost_p (cost) && can_autoinc;
4946 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4947 use that allows autoincrement, and set their AINC_USE if possible. */
4949 static void
4950 set_autoinc_for_original_candidates (struct ivopts_data *data)
4952 unsigned i, j;
4954 for (i = 0; i < n_iv_cands (data); i++)
4956 struct iv_cand *cand = iv_cand (data, i);
4957 struct iv_use *closest_before = NULL;
4958 struct iv_use *closest_after = NULL;
4959 if (cand->pos != IP_ORIGINAL)
4960 continue;
4962 for (j = 0; j < n_iv_uses (data); j++)
4964 struct iv_use *use = iv_use (data, j);
4965 unsigned uid = gimple_uid (use->stmt);
4967 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4968 continue;
4970 if (uid < gimple_uid (cand->incremented_at)
4971 && (closest_before == NULL
4972 || uid > gimple_uid (closest_before->stmt)))
4973 closest_before = use;
4975 if (uid > gimple_uid (cand->incremented_at)
4976 && (closest_after == NULL
4977 || uid < gimple_uid (closest_after->stmt)))
4978 closest_after = use;
4981 if (closest_before != NULL
4982 && autoinc_possible_for_pair (data, closest_before, cand))
4983 cand->ainc_use = closest_before;
4984 else if (closest_after != NULL
4985 && autoinc_possible_for_pair (data, closest_after, cand))
4986 cand->ainc_use = closest_after;
4990 /* Finds the candidates for the induction variables. */
4992 static void
4993 find_iv_candidates (struct ivopts_data *data)
4995 /* Add commonly used ivs. */
4996 add_standard_iv_candidates (data);
4998 /* Add old induction variables. */
4999 add_old_ivs_candidates (data);
5001 /* Add induction variables derived from uses. */
5002 add_derived_ivs_candidates (data);
5004 set_autoinc_for_original_candidates (data);
5006 /* Record the important candidates. */
5007 record_important_candidates (data);
5010 /* Determines costs of basing the use of the iv on an iv candidate. */
5012 static void
5013 determine_use_iv_costs (struct ivopts_data *data)
5015 unsigned i, j;
5016 struct iv_use *use;
5017 struct iv_cand *cand;
5018 bitmap to_clear = BITMAP_ALLOC (NULL);
5020 alloc_use_cost_map (data);
5022 for (i = 0; i < n_iv_uses (data); i++)
5024 use = iv_use (data, i);
5026 if (data->consider_all_candidates)
5028 for (j = 0; j < n_iv_cands (data); j++)
5030 cand = iv_cand (data, j);
5031 determine_use_iv_cost (data, use, cand);
5034 else
5036 bitmap_iterator bi;
5038 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5040 cand = iv_cand (data, j);
5041 if (!determine_use_iv_cost (data, use, cand))
5042 bitmap_set_bit (to_clear, j);
5045 /* Remove the candidates for that the cost is infinite from
5046 the list of related candidates. */
5047 bitmap_and_compl_into (use->related_cands, to_clear);
5048 bitmap_clear (to_clear);
5052 BITMAP_FREE (to_clear);
5054 if (dump_file && (dump_flags & TDF_DETAILS))
5056 fprintf (dump_file, "Use-candidate costs:\n");
5058 for (i = 0; i < n_iv_uses (data); i++)
5060 use = iv_use (data, i);
5062 fprintf (dump_file, "Use %d:\n", i);
5063 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5064 for (j = 0; j < use->n_map_members; j++)
5066 if (!use->cost_map[j].cand
5067 || infinite_cost_p (use->cost_map[j].cost))
5068 continue;
5070 fprintf (dump_file, " %d\t%d\t%d\t",
5071 use->cost_map[j].cand->id,
5072 use->cost_map[j].cost.cost,
5073 use->cost_map[j].cost.complexity);
5074 if (use->cost_map[j].depends_on)
5075 bitmap_print (dump_file,
5076 use->cost_map[j].depends_on, "","");
5077 if (use->cost_map[j].inv_expr_id != -1)
5078 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5079 fprintf (dump_file, "\n");
5082 fprintf (dump_file, "\n");
5084 fprintf (dump_file, "\n");
5088 /* Determines cost of the candidate CAND. */
5090 static void
5091 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5093 comp_cost cost_base;
5094 unsigned cost, cost_step;
5095 tree base;
5097 if (!cand->iv)
5099 cand->cost = 0;
5100 return;
5103 /* There are two costs associated with the candidate -- its increment
5104 and its initialization. The second is almost negligible for any loop
5105 that rolls enough, so we take it just very little into account. */
5107 base = cand->iv->base;
5108 cost_base = force_var_cost (data, base, NULL);
5109 /* It will be exceptional that the iv register happens to be initialized with
5110 the proper value at no cost. In general, there will at least be a regcopy
5111 or a const set. */
5112 if (cost_base.cost == 0)
5113 cost_base.cost = COSTS_N_INSNS (1);
5114 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5116 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5118 /* Prefer the original ivs unless we may gain something by replacing it.
5119 The reason is to make debugging simpler; so this is not relevant for
5120 artificial ivs created by other optimization passes. */
5121 if (cand->pos != IP_ORIGINAL
5122 || !SSA_NAME_VAR (cand->var_before)
5123 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5124 cost++;
5126 /* Prefer not to insert statements into latch unless there are some
5127 already (so that we do not create unnecessary jumps). */
5128 if (cand->pos == IP_END
5129 && empty_block_p (ip_end_pos (data->current_loop)))
5130 cost++;
5132 cand->cost = cost;
5133 cand->cost_step = cost_step;
5136 /* Determines costs of computation of the candidates. */
5138 static void
5139 determine_iv_costs (struct ivopts_data *data)
5141 unsigned i;
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5145 fprintf (dump_file, "Candidate costs:\n");
5146 fprintf (dump_file, " cand\tcost\n");
5149 for (i = 0; i < n_iv_cands (data); i++)
5151 struct iv_cand *cand = iv_cand (data, i);
5153 determine_iv_cost (data, cand);
5155 if (dump_file && (dump_flags & TDF_DETAILS))
5156 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5159 if (dump_file && (dump_flags & TDF_DETAILS))
5160 fprintf (dump_file, "\n");
5163 /* Calculates cost for having SIZE induction variables. */
5165 static unsigned
5166 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5168 /* We add size to the cost, so that we prefer eliminating ivs
5169 if possible. */
5170 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5171 data->body_includes_call);
5174 /* For each size of the induction variable set determine the penalty. */
5176 static void
5177 determine_set_costs (struct ivopts_data *data)
5179 unsigned j, n;
5180 gimple phi;
5181 gimple_stmt_iterator psi;
5182 tree op;
5183 struct loop *loop = data->current_loop;
5184 bitmap_iterator bi;
5186 if (dump_file && (dump_flags & TDF_DETAILS))
5188 fprintf (dump_file, "Global costs:\n");
5189 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5190 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5191 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5192 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5195 n = 0;
5196 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5198 phi = gsi_stmt (psi);
5199 op = PHI_RESULT (phi);
5201 if (virtual_operand_p (op))
5202 continue;
5204 if (get_iv (data, op))
5205 continue;
5207 n++;
5210 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5212 struct version_info *info = ver_info (data, j);
5214 if (info->inv_id && info->has_nonlin_use)
5215 n++;
5218 data->regs_used = n;
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5220 fprintf (dump_file, " regs_used %d\n", n);
5222 if (dump_file && (dump_flags & TDF_DETAILS))
5224 fprintf (dump_file, " cost for size:\n");
5225 fprintf (dump_file, " ivs\tcost\n");
5226 for (j = 0; j <= 2 * target_avail_regs; j++)
5227 fprintf (dump_file, " %d\t%d\n", j,
5228 ivopts_global_cost_for_size (data, j));
5229 fprintf (dump_file, "\n");
5233 /* Returns true if A is a cheaper cost pair than B. */
5235 static bool
5236 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5238 int cmp;
5240 if (!a)
5241 return false;
5243 if (!b)
5244 return true;
5246 cmp = compare_costs (a->cost, b->cost);
5247 if (cmp < 0)
5248 return true;
5250 if (cmp > 0)
5251 return false;
5253 /* In case the costs are the same, prefer the cheaper candidate. */
5254 if (a->cand->cost < b->cand->cost)
5255 return true;
5257 return false;
5261 /* Returns candidate by that USE is expressed in IVS. */
5263 static struct cost_pair *
5264 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5266 return ivs->cand_for_use[use->id];
5269 /* Computes the cost field of IVS structure. */
5271 static void
5272 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5274 comp_cost cost = ivs->cand_use_cost;
5276 cost.cost += ivs->cand_cost;
5278 cost.cost += ivopts_global_cost_for_size (data,
5279 ivs->n_regs + ivs->num_used_inv_expr);
5281 ivs->cost = cost;
5284 /* Remove invariants in set INVS to set IVS. */
5286 static void
5287 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5289 bitmap_iterator bi;
5290 unsigned iid;
5292 if (!invs)
5293 return;
5295 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5297 ivs->n_invariant_uses[iid]--;
5298 if (ivs->n_invariant_uses[iid] == 0)
5299 ivs->n_regs--;
5303 /* Set USE not to be expressed by any candidate in IVS. */
5305 static void
5306 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5307 struct iv_use *use)
5309 unsigned uid = use->id, cid;
5310 struct cost_pair *cp;
5312 cp = ivs->cand_for_use[uid];
5313 if (!cp)
5314 return;
5315 cid = cp->cand->id;
5317 ivs->bad_uses++;
5318 ivs->cand_for_use[uid] = NULL;
5319 ivs->n_cand_uses[cid]--;
5321 if (ivs->n_cand_uses[cid] == 0)
5323 bitmap_clear_bit (ivs->cands, cid);
5324 /* Do not count the pseudocandidates. */
5325 if (cp->cand->iv)
5326 ivs->n_regs--;
5327 ivs->n_cands--;
5328 ivs->cand_cost -= cp->cand->cost;
5330 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5333 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5335 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5337 if (cp->inv_expr_id != -1)
5339 ivs->used_inv_expr[cp->inv_expr_id]--;
5340 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5341 ivs->num_used_inv_expr--;
5343 iv_ca_recount_cost (data, ivs);
5346 /* Add invariants in set INVS to set IVS. */
5348 static void
5349 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5351 bitmap_iterator bi;
5352 unsigned iid;
5354 if (!invs)
5355 return;
5357 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5359 ivs->n_invariant_uses[iid]++;
5360 if (ivs->n_invariant_uses[iid] == 1)
5361 ivs->n_regs++;
5365 /* Set cost pair for USE in set IVS to CP. */
5367 static void
5368 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369 struct iv_use *use, struct cost_pair *cp)
5371 unsigned uid = use->id, cid;
5373 if (ivs->cand_for_use[uid] == cp)
5374 return;
5376 if (ivs->cand_for_use[uid])
5377 iv_ca_set_no_cp (data, ivs, use);
5379 if (cp)
5381 cid = cp->cand->id;
5383 ivs->bad_uses--;
5384 ivs->cand_for_use[uid] = cp;
5385 ivs->n_cand_uses[cid]++;
5386 if (ivs->n_cand_uses[cid] == 1)
5388 bitmap_set_bit (ivs->cands, cid);
5389 /* Do not count the pseudocandidates. */
5390 if (cp->cand->iv)
5391 ivs->n_regs++;
5392 ivs->n_cands++;
5393 ivs->cand_cost += cp->cand->cost;
5395 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5398 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5399 iv_ca_set_add_invariants (ivs, cp->depends_on);
5401 if (cp->inv_expr_id != -1)
5403 ivs->used_inv_expr[cp->inv_expr_id]++;
5404 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5405 ivs->num_used_inv_expr++;
5407 iv_ca_recount_cost (data, ivs);
5411 /* Extend set IVS by expressing USE by some of the candidates in it
5412 if possible. Consider all important candidates if candidates in
5413 set IVS don't give any result. */
5415 static void
5416 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5417 struct iv_use *use)
5419 struct cost_pair *best_cp = NULL, *cp;
5420 bitmap_iterator bi;
5421 unsigned i;
5422 struct iv_cand *cand;
5424 gcc_assert (ivs->upto >= use->id);
5425 ivs->upto++;
5426 ivs->bad_uses++;
5428 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5430 cand = iv_cand (data, i);
5431 cp = get_use_iv_cost (data, use, cand);
5432 if (cheaper_cost_pair (cp, best_cp))
5433 best_cp = cp;
5436 if (best_cp == NULL)
5438 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5440 cand = iv_cand (data, i);
5441 cp = get_use_iv_cost (data, use, cand);
5442 if (cheaper_cost_pair (cp, best_cp))
5443 best_cp = cp;
5447 iv_ca_set_cp (data, ivs, use, best_cp);
5450 /* Get cost for assignment IVS. */
5452 static comp_cost
5453 iv_ca_cost (struct iv_ca *ivs)
5455 /* This was a conditional expression but it triggered a bug in
5456 Sun C 5.5. */
5457 if (ivs->bad_uses)
5458 return infinite_cost;
5459 else
5460 return ivs->cost;
5463 /* Returns true if all dependences of CP are among invariants in IVS. */
5465 static bool
5466 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5468 unsigned i;
5469 bitmap_iterator bi;
5471 if (!cp->depends_on)
5472 return true;
5474 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5476 if (ivs->n_invariant_uses[i] == 0)
5477 return false;
5480 return true;
5483 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5484 it before NEXT_CHANGE. */
5486 static struct iv_ca_delta *
5487 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5488 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5490 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5492 change->use = use;
5493 change->old_cp = old_cp;
5494 change->new_cp = new_cp;
5495 change->next_change = next_change;
5497 return change;
5500 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5501 are rewritten. */
5503 static struct iv_ca_delta *
5504 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5506 struct iv_ca_delta *last;
5508 if (!l2)
5509 return l1;
5511 if (!l1)
5512 return l2;
5514 for (last = l1; last->next_change; last = last->next_change)
5515 continue;
5516 last->next_change = l2;
5518 return l1;
5521 /* Reverse the list of changes DELTA, forming the inverse to it. */
5523 static struct iv_ca_delta *
5524 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5526 struct iv_ca_delta *act, *next, *prev = NULL;
5527 struct cost_pair *tmp;
5529 for (act = delta; act; act = next)
5531 next = act->next_change;
5532 act->next_change = prev;
5533 prev = act;
5535 tmp = act->old_cp;
5536 act->old_cp = act->new_cp;
5537 act->new_cp = tmp;
5540 return prev;
5543 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5544 reverted instead. */
5546 static void
5547 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5548 struct iv_ca_delta *delta, bool forward)
5550 struct cost_pair *from, *to;
5551 struct iv_ca_delta *act;
5553 if (!forward)
5554 delta = iv_ca_delta_reverse (delta);
5556 for (act = delta; act; act = act->next_change)
5558 from = act->old_cp;
5559 to = act->new_cp;
5560 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5561 iv_ca_set_cp (data, ivs, act->use, to);
5564 if (!forward)
5565 iv_ca_delta_reverse (delta);
5568 /* Returns true if CAND is used in IVS. */
5570 static bool
5571 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5573 return ivs->n_cand_uses[cand->id] > 0;
5576 /* Returns number of induction variable candidates in the set IVS. */
5578 static unsigned
5579 iv_ca_n_cands (struct iv_ca *ivs)
5581 return ivs->n_cands;
5584 /* Free the list of changes DELTA. */
5586 static void
5587 iv_ca_delta_free (struct iv_ca_delta **delta)
5589 struct iv_ca_delta *act, *next;
5591 for (act = *delta; act; act = next)
5593 next = act->next_change;
5594 free (act);
5597 *delta = NULL;
5600 /* Allocates new iv candidates assignment. */
5602 static struct iv_ca *
5603 iv_ca_new (struct ivopts_data *data)
5605 struct iv_ca *nw = XNEW (struct iv_ca);
5607 nw->upto = 0;
5608 nw->bad_uses = 0;
5609 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5610 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5611 nw->cands = BITMAP_ALLOC (NULL);
5612 nw->n_cands = 0;
5613 nw->n_regs = 0;
5614 nw->cand_use_cost = no_cost;
5615 nw->cand_cost = 0;
5616 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5617 nw->cost = no_cost;
5618 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5619 nw->num_used_inv_expr = 0;
5621 return nw;
5624 /* Free memory occupied by the set IVS. */
5626 static void
5627 iv_ca_free (struct iv_ca **ivs)
5629 free ((*ivs)->cand_for_use);
5630 free ((*ivs)->n_cand_uses);
5631 BITMAP_FREE ((*ivs)->cands);
5632 free ((*ivs)->n_invariant_uses);
5633 free ((*ivs)->used_inv_expr);
5634 free (*ivs);
5635 *ivs = NULL;
5638 /* Dumps IVS to FILE. */
5640 static void
5641 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5643 const char *pref = " invariants ";
5644 unsigned i;
5645 comp_cost cost = iv_ca_cost (ivs);
5647 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5648 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5649 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5650 bitmap_print (file, ivs->cands, " candidates: ","\n");
5652 for (i = 0; i < ivs->upto; i++)
5654 struct iv_use *use = iv_use (data, i);
5655 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5656 if (cp)
5657 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5658 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5659 else
5660 fprintf (file, " use:%d --> ??\n", use->id);
5663 for (i = 1; i <= data->max_inv_id; i++)
5664 if (ivs->n_invariant_uses[i])
5666 fprintf (file, "%s%d", pref, i);
5667 pref = ", ";
5669 fprintf (file, "\n\n");
5672 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5673 new set, and store differences in DELTA. Number of induction variables
5674 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5675 the function will try to find a solution with mimimal iv candidates. */
5677 static comp_cost
5678 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5679 struct iv_cand *cand, struct iv_ca_delta **delta,
5680 unsigned *n_ivs, bool min_ncand)
5682 unsigned i;
5683 comp_cost cost;
5684 struct iv_use *use;
5685 struct cost_pair *old_cp, *new_cp;
5687 *delta = NULL;
5688 for (i = 0; i < ivs->upto; i++)
5690 use = iv_use (data, i);
5691 old_cp = iv_ca_cand_for_use (ivs, use);
5693 if (old_cp
5694 && old_cp->cand == cand)
5695 continue;
5697 new_cp = get_use_iv_cost (data, use, cand);
5698 if (!new_cp)
5699 continue;
5701 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5702 continue;
5704 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5705 continue;
5707 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5710 iv_ca_delta_commit (data, ivs, *delta, true);
5711 cost = iv_ca_cost (ivs);
5712 if (n_ivs)
5713 *n_ivs = iv_ca_n_cands (ivs);
5714 iv_ca_delta_commit (data, ivs, *delta, false);
5716 return cost;
5719 /* Try narrowing set IVS by removing CAND. Return the cost of
5720 the new set and store the differences in DELTA. START is
5721 the candidate with which we start narrowing. */
5723 static comp_cost
5724 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5725 struct iv_cand *cand, struct iv_cand *start,
5726 struct iv_ca_delta **delta)
5728 unsigned i, ci;
5729 struct iv_use *use;
5730 struct cost_pair *old_cp, *new_cp, *cp;
5731 bitmap_iterator bi;
5732 struct iv_cand *cnd;
5733 comp_cost cost, best_cost, acost;
5735 *delta = NULL;
5736 for (i = 0; i < n_iv_uses (data); i++)
5738 use = iv_use (data, i);
5740 old_cp = iv_ca_cand_for_use (ivs, use);
5741 if (old_cp->cand != cand)
5742 continue;
5744 best_cost = iv_ca_cost (ivs);
5745 /* Start narrowing with START. */
5746 new_cp = get_use_iv_cost (data, use, start);
5748 if (data->consider_all_candidates)
5750 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5752 if (ci == cand->id || (start && ci == start->id))
5753 continue;
5755 cnd = iv_cand (data, ci);
5757 cp = get_use_iv_cost (data, use, cnd);
5758 if (!cp)
5759 continue;
5761 iv_ca_set_cp (data, ivs, use, cp);
5762 acost = iv_ca_cost (ivs);
5764 if (compare_costs (acost, best_cost) < 0)
5766 best_cost = acost;
5767 new_cp = cp;
5771 else
5773 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5775 if (ci == cand->id || (start && ci == start->id))
5776 continue;
5778 cnd = iv_cand (data, ci);
5780 cp = get_use_iv_cost (data, use, cnd);
5781 if (!cp)
5782 continue;
5784 iv_ca_set_cp (data, ivs, use, cp);
5785 acost = iv_ca_cost (ivs);
5787 if (compare_costs (acost, best_cost) < 0)
5789 best_cost = acost;
5790 new_cp = cp;
5794 /* Restore to old cp for use. */
5795 iv_ca_set_cp (data, ivs, use, old_cp);
5797 if (!new_cp)
5799 iv_ca_delta_free (delta);
5800 return infinite_cost;
5803 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5806 iv_ca_delta_commit (data, ivs, *delta, true);
5807 cost = iv_ca_cost (ivs);
5808 iv_ca_delta_commit (data, ivs, *delta, false);
5810 return cost;
5813 /* Try optimizing the set of candidates IVS by removing candidates different
5814 from to EXCEPT_CAND from it. Return cost of the new set, and store
5815 differences in DELTA. */
5817 static comp_cost
5818 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5819 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5821 bitmap_iterator bi;
5822 struct iv_ca_delta *act_delta, *best_delta;
5823 unsigned i;
5824 comp_cost best_cost, acost;
5825 struct iv_cand *cand;
5827 best_delta = NULL;
5828 best_cost = iv_ca_cost (ivs);
5830 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5832 cand = iv_cand (data, i);
5834 if (cand == except_cand)
5835 continue;
5837 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5839 if (compare_costs (acost, best_cost) < 0)
5841 best_cost = acost;
5842 iv_ca_delta_free (&best_delta);
5843 best_delta = act_delta;
5845 else
5846 iv_ca_delta_free (&act_delta);
5849 if (!best_delta)
5851 *delta = NULL;
5852 return best_cost;
5855 /* Recurse to possibly remove other unnecessary ivs. */
5856 iv_ca_delta_commit (data, ivs, best_delta, true);
5857 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5858 iv_ca_delta_commit (data, ivs, best_delta, false);
5859 *delta = iv_ca_delta_join (best_delta, *delta);
5860 return best_cost;
5863 /* Tries to extend the sets IVS in the best possible way in order
5864 to express the USE. If ORIGINALP is true, prefer candidates from
5865 the original set of IVs, otherwise favor important candidates not
5866 based on any memory object. */
5868 static bool
5869 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5870 struct iv_use *use, bool originalp)
5872 comp_cost best_cost, act_cost;
5873 unsigned i;
5874 bitmap_iterator bi;
5875 struct iv_cand *cand;
5876 struct iv_ca_delta *best_delta = NULL, *act_delta;
5877 struct cost_pair *cp;
5879 iv_ca_add_use (data, ivs, use);
5880 best_cost = iv_ca_cost (ivs);
5881 cp = iv_ca_cand_for_use (ivs, use);
5882 if (cp)
5884 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5885 iv_ca_set_no_cp (data, ivs, use);
5888 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5889 first try important candidates not based on any memory object. Only if
5890 this fails, try the specific ones. Rationale -- in loops with many
5891 variables the best choice often is to use just one generic biv. If we
5892 added here many ivs specific to the uses, the optimization algorithm later
5893 would be likely to get stuck in a local minimum, thus causing us to create
5894 too many ivs. The approach from few ivs to more seems more likely to be
5895 successful -- starting from few ivs, replacing an expensive use by a
5896 specific iv should always be a win. */
5897 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5899 cand = iv_cand (data, i);
5901 if (originalp && cand->pos !=IP_ORIGINAL)
5902 continue;
5904 if (!originalp && cand->iv->base_object != NULL_TREE)
5905 continue;
5907 if (iv_ca_cand_used_p (ivs, cand))
5908 continue;
5910 cp = get_use_iv_cost (data, use, cand);
5911 if (!cp)
5912 continue;
5914 iv_ca_set_cp (data, ivs, use, cp);
5915 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5916 true);
5917 iv_ca_set_no_cp (data, ivs, use);
5918 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5920 if (compare_costs (act_cost, best_cost) < 0)
5922 best_cost = act_cost;
5924 iv_ca_delta_free (&best_delta);
5925 best_delta = act_delta;
5927 else
5928 iv_ca_delta_free (&act_delta);
5931 if (infinite_cost_p (best_cost))
5933 for (i = 0; i < use->n_map_members; i++)
5935 cp = use->cost_map + i;
5936 cand = cp->cand;
5937 if (!cand)
5938 continue;
5940 /* Already tried this. */
5941 if (cand->important)
5943 if (originalp && cand->pos == IP_ORIGINAL)
5944 continue;
5945 if (!originalp && cand->iv->base_object == NULL_TREE)
5946 continue;
5949 if (iv_ca_cand_used_p (ivs, cand))
5950 continue;
5952 act_delta = NULL;
5953 iv_ca_set_cp (data, ivs, use, cp);
5954 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5955 iv_ca_set_no_cp (data, ivs, use);
5956 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5957 cp, act_delta);
5959 if (compare_costs (act_cost, best_cost) < 0)
5961 best_cost = act_cost;
5963 if (best_delta)
5964 iv_ca_delta_free (&best_delta);
5965 best_delta = act_delta;
5967 else
5968 iv_ca_delta_free (&act_delta);
5972 iv_ca_delta_commit (data, ivs, best_delta, true);
5973 iv_ca_delta_free (&best_delta);
5975 return !infinite_cost_p (best_cost);
5978 /* Finds an initial assignment of candidates to uses. */
5980 static struct iv_ca *
5981 get_initial_solution (struct ivopts_data *data, bool originalp)
5983 struct iv_ca *ivs = iv_ca_new (data);
5984 unsigned i;
5986 for (i = 0; i < n_iv_uses (data); i++)
5987 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5989 iv_ca_free (&ivs);
5990 return NULL;
5993 return ivs;
5996 /* Tries to improve set of induction variables IVS. */
5998 static bool
5999 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6001 unsigned i, n_ivs;
6002 comp_cost acost, best_cost = iv_ca_cost (ivs);
6003 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6004 struct iv_cand *cand;
6006 /* Try extending the set of induction variables by one. */
6007 for (i = 0; i < n_iv_cands (data); i++)
6009 cand = iv_cand (data, i);
6011 if (iv_ca_cand_used_p (ivs, cand))
6012 continue;
6014 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6015 if (!act_delta)
6016 continue;
6018 /* If we successfully added the candidate and the set is small enough,
6019 try optimizing it by removing other candidates. */
6020 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6022 iv_ca_delta_commit (data, ivs, act_delta, true);
6023 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6024 iv_ca_delta_commit (data, ivs, act_delta, false);
6025 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6028 if (compare_costs (acost, best_cost) < 0)
6030 best_cost = acost;
6031 iv_ca_delta_free (&best_delta);
6032 best_delta = act_delta;
6034 else
6035 iv_ca_delta_free (&act_delta);
6038 if (!best_delta)
6040 /* Try removing the candidates from the set instead. */
6041 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6043 /* Nothing more we can do. */
6044 if (!best_delta)
6045 return false;
6048 iv_ca_delta_commit (data, ivs, best_delta, true);
6049 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6050 iv_ca_delta_free (&best_delta);
6051 return true;
6054 /* Attempts to find the optimal set of induction variables. We do simple
6055 greedy heuristic -- we try to replace at most one candidate in the selected
6056 solution and remove the unused ivs while this improves the cost. */
6058 static struct iv_ca *
6059 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6061 struct iv_ca *set;
6063 /* Get the initial solution. */
6064 set = get_initial_solution (data, originalp);
6065 if (!set)
6067 if (dump_file && (dump_flags & TDF_DETAILS))
6068 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6069 return NULL;
6072 if (dump_file && (dump_flags & TDF_DETAILS))
6074 fprintf (dump_file, "Initial set of candidates:\n");
6075 iv_ca_dump (data, dump_file, set);
6078 while (try_improve_iv_set (data, set))
6080 if (dump_file && (dump_flags & TDF_DETAILS))
6082 fprintf (dump_file, "Improved to:\n");
6083 iv_ca_dump (data, dump_file, set);
6087 return set;
6090 static struct iv_ca *
6091 find_optimal_iv_set (struct ivopts_data *data)
6093 unsigned i;
6094 struct iv_ca *set, *origset;
6095 struct iv_use *use;
6096 comp_cost cost, origcost;
6098 /* Determine the cost based on a strategy that starts with original IVs,
6099 and try again using a strategy that prefers candidates not based
6100 on any IVs. */
6101 origset = find_optimal_iv_set_1 (data, true);
6102 set = find_optimal_iv_set_1 (data, false);
6104 if (!origset && !set)
6105 return NULL;
6107 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6108 cost = set ? iv_ca_cost (set) : infinite_cost;
6110 if (dump_file && (dump_flags & TDF_DETAILS))
6112 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6113 origcost.cost, origcost.complexity);
6114 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6115 cost.cost, cost.complexity);
6118 /* Choose the one with the best cost. */
6119 if (compare_costs (origcost, cost) <= 0)
6121 if (set)
6122 iv_ca_free (&set);
6123 set = origset;
6125 else if (origset)
6126 iv_ca_free (&origset);
6128 for (i = 0; i < n_iv_uses (data); i++)
6130 use = iv_use (data, i);
6131 use->selected = iv_ca_cand_for_use (set, use)->cand;
6134 return set;
6137 /* Creates a new induction variable corresponding to CAND. */
6139 static void
6140 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6142 gimple_stmt_iterator incr_pos;
6143 tree base;
6144 bool after = false;
6146 if (!cand->iv)
6147 return;
6149 switch (cand->pos)
6151 case IP_NORMAL:
6152 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6153 break;
6155 case IP_END:
6156 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6157 after = true;
6158 break;
6160 case IP_AFTER_USE:
6161 after = true;
6162 /* fall through */
6163 case IP_BEFORE_USE:
6164 incr_pos = gsi_for_stmt (cand->incremented_at);
6165 break;
6167 case IP_ORIGINAL:
6168 /* Mark that the iv is preserved. */
6169 name_info (data, cand->var_before)->preserve_biv = true;
6170 name_info (data, cand->var_after)->preserve_biv = true;
6172 /* Rewrite the increment so that it uses var_before directly. */
6173 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6174 return;
6177 gimple_add_tmp_var (cand->var_before);
6179 base = unshare_expr (cand->iv->base);
6181 create_iv (base, unshare_expr (cand->iv->step),
6182 cand->var_before, data->current_loop,
6183 &incr_pos, after, &cand->var_before, &cand->var_after);
6186 /* Creates new induction variables described in SET. */
6188 static void
6189 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6191 unsigned i;
6192 struct iv_cand *cand;
6193 bitmap_iterator bi;
6195 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6197 cand = iv_cand (data, i);
6198 create_new_iv (data, cand);
6201 if (dump_file && (dump_flags & TDF_DETAILS))
6203 fprintf (dump_file, "\nSelected IV set: \n");
6204 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6206 cand = iv_cand (data, i);
6207 dump_cand (dump_file, cand);
6209 fprintf (dump_file, "\n");
6213 /* Rewrites USE (definition of iv used in a nonlinear expression)
6214 using candidate CAND. */
6216 static void
6217 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6218 struct iv_use *use, struct iv_cand *cand)
6220 tree comp;
6221 tree op, tgt;
6222 gimple ass;
6223 gimple_stmt_iterator bsi;
6225 /* An important special case -- if we are asked to express value of
6226 the original iv by itself, just exit; there is no need to
6227 introduce a new computation (that might also need casting the
6228 variable to unsigned and back). */
6229 if (cand->pos == IP_ORIGINAL
6230 && cand->incremented_at == use->stmt)
6232 enum tree_code stmt_code;
6234 gcc_assert (is_gimple_assign (use->stmt));
6235 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6237 /* Check whether we may leave the computation unchanged.
6238 This is the case only if it does not rely on other
6239 computations in the loop -- otherwise, the computation
6240 we rely upon may be removed in remove_unused_ivs,
6241 thus leading to ICE. */
6242 stmt_code = gimple_assign_rhs_code (use->stmt);
6243 if (stmt_code == PLUS_EXPR
6244 || stmt_code == MINUS_EXPR
6245 || stmt_code == POINTER_PLUS_EXPR)
6247 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6248 op = gimple_assign_rhs2 (use->stmt);
6249 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6250 op = gimple_assign_rhs1 (use->stmt);
6251 else
6252 op = NULL_TREE;
6254 else
6255 op = NULL_TREE;
6257 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6258 return;
6261 comp = get_computation (data->current_loop, use, cand);
6262 gcc_assert (comp != NULL_TREE);
6264 switch (gimple_code (use->stmt))
6266 case GIMPLE_PHI:
6267 tgt = PHI_RESULT (use->stmt);
6269 /* If we should keep the biv, do not replace it. */
6270 if (name_info (data, tgt)->preserve_biv)
6271 return;
6273 bsi = gsi_after_labels (gimple_bb (use->stmt));
6274 break;
6276 case GIMPLE_ASSIGN:
6277 tgt = gimple_assign_lhs (use->stmt);
6278 bsi = gsi_for_stmt (use->stmt);
6279 break;
6281 default:
6282 gcc_unreachable ();
6285 if (!valid_gimple_rhs_p (comp)
6286 || (gimple_code (use->stmt) != GIMPLE_PHI
6287 /* We can't allow re-allocating the stmt as it might be pointed
6288 to still. */
6289 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6290 >= gimple_num_ops (gsi_stmt (bsi)))))
6292 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6293 true, GSI_SAME_STMT);
6294 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6296 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6297 /* As this isn't a plain copy we have to reset alignment
6298 information. */
6299 if (SSA_NAME_PTR_INFO (comp))
6300 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6304 if (gimple_code (use->stmt) == GIMPLE_PHI)
6306 ass = gimple_build_assign (tgt, comp);
6307 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6309 bsi = gsi_for_stmt (use->stmt);
6310 remove_phi_node (&bsi, false);
6312 else
6314 gimple_assign_set_rhs_from_tree (&bsi, comp);
6315 use->stmt = gsi_stmt (bsi);
6319 /* Performs a peephole optimization to reorder the iv update statement with
6320 a mem ref to enable instruction combining in later phases. The mem ref uses
6321 the iv value before the update, so the reordering transformation requires
6322 adjustment of the offset. CAND is the selected IV_CAND.
6324 Example:
6326 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6327 iv2 = iv1 + 1;
6329 if (t < val) (1)
6330 goto L;
6331 goto Head;
6334 directly propagating t over to (1) will introduce overlapping live range
6335 thus increase register pressure. This peephole transform it into:
6338 iv2 = iv1 + 1;
6339 t = MEM_REF (base, iv2, 8, 8);
6340 if (t < val)
6341 goto L;
6342 goto Head;
6345 static void
6346 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6348 tree var_after;
6349 gimple iv_update, stmt;
6350 basic_block bb;
6351 gimple_stmt_iterator gsi, gsi_iv;
6353 if (cand->pos != IP_NORMAL)
6354 return;
6356 var_after = cand->var_after;
6357 iv_update = SSA_NAME_DEF_STMT (var_after);
6359 bb = gimple_bb (iv_update);
6360 gsi = gsi_last_nondebug_bb (bb);
6361 stmt = gsi_stmt (gsi);
6363 /* Only handle conditional statement for now. */
6364 if (gimple_code (stmt) != GIMPLE_COND)
6365 return;
6367 gsi_prev_nondebug (&gsi);
6368 stmt = gsi_stmt (gsi);
6369 if (stmt != iv_update)
6370 return;
6372 gsi_prev_nondebug (&gsi);
6373 if (gsi_end_p (gsi))
6374 return;
6376 stmt = gsi_stmt (gsi);
6377 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6378 return;
6380 if (stmt != use->stmt)
6381 return;
6383 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6384 return;
6386 if (dump_file && (dump_flags & TDF_DETAILS))
6388 fprintf (dump_file, "Reordering \n");
6389 print_gimple_stmt (dump_file, iv_update, 0, 0);
6390 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6391 fprintf (dump_file, "\n");
6394 gsi = gsi_for_stmt (use->stmt);
6395 gsi_iv = gsi_for_stmt (iv_update);
6396 gsi_move_before (&gsi_iv, &gsi);
6398 cand->pos = IP_BEFORE_USE;
6399 cand->incremented_at = use->stmt;
6402 /* Rewrites USE (address that is an iv) using candidate CAND. */
6404 static void
6405 rewrite_use_address (struct ivopts_data *data,
6406 struct iv_use *use, struct iv_cand *cand)
6408 aff_tree aff;
6409 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6410 tree base_hint = NULL_TREE;
6411 tree ref, iv;
6412 bool ok;
6414 adjust_iv_update_pos (cand, use);
6415 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6416 gcc_assert (ok);
6417 unshare_aff_combination (&aff);
6419 /* To avoid undefined overflow problems, all IV candidates use unsigned
6420 integer types. The drawback is that this makes it impossible for
6421 create_mem_ref to distinguish an IV that is based on a memory object
6422 from one that represents simply an offset.
6424 To work around this problem, we pass a hint to create_mem_ref that
6425 indicates which variable (if any) in aff is an IV based on a memory
6426 object. Note that we only consider the candidate. If this is not
6427 based on an object, the base of the reference is in some subexpression
6428 of the use -- but these will use pointer types, so they are recognized
6429 by the create_mem_ref heuristics anyway. */
6430 if (cand->iv->base_object)
6431 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6433 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6434 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6435 reference_alias_ptr_type (*use->op_p),
6436 iv, base_hint, data->speed);
6437 copy_ref_info (ref, *use->op_p);
6438 *use->op_p = ref;
6441 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6442 candidate CAND. */
6444 static void
6445 rewrite_use_compare (struct ivopts_data *data,
6446 struct iv_use *use, struct iv_cand *cand)
6448 tree comp, *var_p, op, bound;
6449 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6450 enum tree_code compare;
6451 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6452 bool ok;
6454 bound = cp->value;
6455 if (bound)
6457 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6458 tree var_type = TREE_TYPE (var);
6459 gimple_seq stmts;
6461 if (dump_file && (dump_flags & TDF_DETAILS))
6463 fprintf (dump_file, "Replacing exit test: ");
6464 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6466 compare = cp->comp;
6467 bound = unshare_expr (fold_convert (var_type, bound));
6468 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6469 if (stmts)
6470 gsi_insert_seq_on_edge_immediate (
6471 loop_preheader_edge (data->current_loop),
6472 stmts);
6474 gimple_cond_set_lhs (use->stmt, var);
6475 gimple_cond_set_code (use->stmt, compare);
6476 gimple_cond_set_rhs (use->stmt, op);
6477 return;
6480 /* The induction variable elimination failed; just express the original
6481 giv. */
6482 comp = get_computation (data->current_loop, use, cand);
6483 gcc_assert (comp != NULL_TREE);
6485 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6486 gcc_assert (ok);
6488 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6489 true, GSI_SAME_STMT);
6492 /* Rewrites USE using candidate CAND. */
6494 static void
6495 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6497 switch (use->type)
6499 case USE_NONLINEAR_EXPR:
6500 rewrite_use_nonlinear_expr (data, use, cand);
6501 break;
6503 case USE_ADDRESS:
6504 rewrite_use_address (data, use, cand);
6505 break;
6507 case USE_COMPARE:
6508 rewrite_use_compare (data, use, cand);
6509 break;
6511 default:
6512 gcc_unreachable ();
6515 update_stmt (use->stmt);
6518 /* Rewrite the uses using the selected induction variables. */
6520 static void
6521 rewrite_uses (struct ivopts_data *data)
6523 unsigned i;
6524 struct iv_cand *cand;
6525 struct iv_use *use;
6527 for (i = 0; i < n_iv_uses (data); i++)
6529 use = iv_use (data, i);
6530 cand = use->selected;
6531 gcc_assert (cand);
6533 rewrite_use (data, use, cand);
6537 /* Removes the ivs that are not used after rewriting. */
6539 static void
6540 remove_unused_ivs (struct ivopts_data *data)
6542 unsigned j;
6543 bitmap_iterator bi;
6544 bitmap toremove = BITMAP_ALLOC (NULL);
6546 /* Figure out an order in which to release SSA DEFs so that we don't
6547 release something that we'd have to propagate into a debug stmt
6548 afterwards. */
6549 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6551 struct version_info *info;
6553 info = ver_info (data, j);
6554 if (info->iv
6555 && !integer_zerop (info->iv->step)
6556 && !info->inv_id
6557 && !info->iv->have_use_for
6558 && !info->preserve_biv)
6560 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6562 tree def = info->iv->ssa_name;
6564 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6566 imm_use_iterator imm_iter;
6567 use_operand_p use_p;
6568 gimple stmt;
6569 int count = 0;
6571 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6573 if (!gimple_debug_bind_p (stmt))
6574 continue;
6576 /* We just want to determine whether to do nothing
6577 (count == 0), to substitute the computed
6578 expression into a single use of the SSA DEF by
6579 itself (count == 1), or to use a debug temp
6580 because the SSA DEF is used multiple times or as
6581 part of a larger expression (count > 1). */
6582 count++;
6583 if (gimple_debug_bind_get_value (stmt) != def)
6584 count++;
6586 if (count > 1)
6587 BREAK_FROM_IMM_USE_STMT (imm_iter);
6590 if (!count)
6591 continue;
6593 struct iv_use dummy_use;
6594 struct iv_cand *best_cand = NULL, *cand;
6595 unsigned i, best_pref = 0, cand_pref;
6597 memset (&dummy_use, 0, sizeof (dummy_use));
6598 dummy_use.iv = info->iv;
6599 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6601 cand = iv_use (data, i)->selected;
6602 if (cand == best_cand)
6603 continue;
6604 cand_pref = operand_equal_p (cand->iv->step,
6605 info->iv->step, 0)
6606 ? 4 : 0;
6607 cand_pref
6608 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6609 == TYPE_MODE (TREE_TYPE (info->iv->base))
6610 ? 2 : 0;
6611 cand_pref
6612 += TREE_CODE (cand->iv->base) == INTEGER_CST
6613 ? 1 : 0;
6614 if (best_cand == NULL || best_pref < cand_pref)
6616 best_cand = cand;
6617 best_pref = cand_pref;
6621 if (!best_cand)
6622 continue;
6624 tree comp = get_computation_at (data->current_loop,
6625 &dummy_use, best_cand,
6626 SSA_NAME_DEF_STMT (def));
6627 if (!comp)
6628 continue;
6630 if (count > 1)
6632 tree vexpr = make_node (DEBUG_EXPR_DECL);
6633 DECL_ARTIFICIAL (vexpr) = 1;
6634 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6635 if (SSA_NAME_VAR (def))
6636 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6637 else
6638 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6639 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6640 gimple_stmt_iterator gsi;
6642 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6643 gsi = gsi_after_labels (gimple_bb
6644 (SSA_NAME_DEF_STMT (def)));
6645 else
6646 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6648 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6649 comp = vexpr;
6652 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6654 if (!gimple_debug_bind_p (stmt))
6655 continue;
6657 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6658 SET_USE (use_p, comp);
6660 update_stmt (stmt);
6666 release_defs_bitset (toremove);
6668 BITMAP_FREE (toremove);
6671 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6672 for hash_map::traverse. */
6674 bool
6675 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6677 free (value);
6678 return true;
6681 /* Frees data allocated by the optimization of a single loop. */
6683 static void
6684 free_loop_data (struct ivopts_data *data)
6686 unsigned i, j;
6687 bitmap_iterator bi;
6688 tree obj;
6690 if (data->niters)
6692 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6693 delete data->niters;
6694 data->niters = NULL;
6697 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6699 struct version_info *info;
6701 info = ver_info (data, i);
6702 free (info->iv);
6703 info->iv = NULL;
6704 info->has_nonlin_use = false;
6705 info->preserve_biv = false;
6706 info->inv_id = 0;
6708 bitmap_clear (data->relevant);
6709 bitmap_clear (data->important_candidates);
6711 for (i = 0; i < n_iv_uses (data); i++)
6713 struct iv_use *use = iv_use (data, i);
6715 free (use->iv);
6716 BITMAP_FREE (use->related_cands);
6717 for (j = 0; j < use->n_map_members; j++)
6718 if (use->cost_map[j].depends_on)
6719 BITMAP_FREE (use->cost_map[j].depends_on);
6720 free (use->cost_map);
6721 free (use);
6723 data->iv_uses.truncate (0);
6725 for (i = 0; i < n_iv_cands (data); i++)
6727 struct iv_cand *cand = iv_cand (data, i);
6729 free (cand->iv);
6730 if (cand->depends_on)
6731 BITMAP_FREE (cand->depends_on);
6732 free (cand);
6734 data->iv_candidates.truncate (0);
6736 if (data->version_info_size < num_ssa_names)
6738 data->version_info_size = 2 * num_ssa_names;
6739 free (data->version_info);
6740 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6743 data->max_inv_id = 0;
6745 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6746 SET_DECL_RTL (obj, NULL_RTX);
6748 decl_rtl_to_reset.truncate (0);
6750 data->inv_expr_tab->empty ();
6751 data->inv_expr_id = 0;
6754 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6755 loop tree. */
6757 static void
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6760 free_loop_data (data);
6761 free (data->version_info);
6762 BITMAP_FREE (data->relevant);
6763 BITMAP_FREE (data->important_candidates);
6765 decl_rtl_to_reset.release ();
6766 data->iv_uses.release ();
6767 data->iv_candidates.release ();
6768 delete data->inv_expr_tab;
6769 data->inv_expr_tab = NULL;
6770 free_affine_expand_cache (&data->name_expansion_cache);
6773 /* Returns true if the loop body BODY includes any function calls. */
6775 static bool
6776 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6778 gimple_stmt_iterator gsi;
6779 unsigned i;
6781 for (i = 0; i < num_nodes; i++)
6782 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6784 gimple stmt = gsi_stmt (gsi);
6785 if (is_gimple_call (stmt)
6786 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6787 return true;
6789 return false;
6792 /* Optimizes the LOOP. Returns true if anything changed. */
6794 static bool
6795 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6797 bool changed = false;
6798 struct iv_ca *iv_ca;
6799 edge exit = single_dom_exit (loop);
6800 basic_block *body;
6802 gcc_assert (!data->niters);
6803 data->current_loop = loop;
6804 data->speed = optimize_loop_for_speed_p (loop);
6806 if (dump_file && (dump_flags & TDF_DETAILS))
6808 fprintf (dump_file, "Processing loop %d\n", loop->num);
6810 if (exit)
6812 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6813 exit->src->index, exit->dest->index);
6814 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6815 fprintf (dump_file, "\n");
6818 fprintf (dump_file, "\n");
6821 body = get_loop_body (loop);
6822 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6823 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6824 free (body);
6826 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6828 /* For each ssa name determines whether it behaves as an induction variable
6829 in some loop. */
6830 if (!find_induction_variables (data))
6831 goto finish;
6833 /* Finds interesting uses (item 1). */
6834 find_interesting_uses (data);
6835 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6836 goto finish;
6838 /* Finds candidates for the induction variables (item 2). */
6839 find_iv_candidates (data);
6841 /* Calculates the costs (item 3, part 1). */
6842 determine_iv_costs (data);
6843 determine_use_iv_costs (data);
6844 determine_set_costs (data);
6846 /* Find the optimal set of induction variables (item 3, part 2). */
6847 iv_ca = find_optimal_iv_set (data);
6848 if (!iv_ca)
6849 goto finish;
6850 changed = true;
6852 /* Create the new induction variables (item 4, part 1). */
6853 create_new_ivs (data, iv_ca);
6854 iv_ca_free (&iv_ca);
6856 /* Rewrite the uses (item 4, part 2). */
6857 rewrite_uses (data);
6859 /* Remove the ivs that are unused after rewriting. */
6860 remove_unused_ivs (data);
6862 /* We have changed the structure of induction variables; it might happen
6863 that definitions in the scev database refer to some of them that were
6864 eliminated. */
6865 scev_reset ();
6867 finish:
6868 free_loop_data (data);
6870 return changed;
6873 /* Main entry point. Optimizes induction variables in loops. */
6875 void
6876 tree_ssa_iv_optimize (void)
6878 struct loop *loop;
6879 struct ivopts_data data;
6881 tree_ssa_iv_optimize_init (&data);
6883 /* Optimize the loops starting with the innermost ones. */
6884 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6886 if (dump_file && (dump_flags & TDF_DETAILS))
6887 flow_loop_dump (loop, dump_file, NULL, 1);
6889 tree_ssa_iv_optimize_loop (&data, loop);
6892 tree_ssa_iv_optimize_finalize (&data);