Fix bootstrap/PR63632
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob087ca26fbbaabef5c383ec69272dccc933e26f97
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand *selected;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
217 enum iv_position
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
226 /* The induction variable candidate. */
227 struct iv_cand
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
252 tree expr;
253 int id;
254 hashval_t hash;
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use *iv_use_p;
261 typedef struct iv_cand *iv_cand_p;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
273 /* Hash function for loop invariant expressions. */
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
278 return expr->hash;
281 /* Hash table equality function for expressions. */
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
290 struct ivopts_data
292 /* The currently optimized loop. */
293 struct loop *current_loop;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
298 /* Number of registers used in it. */
299 unsigned regs_used;
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
311 /* Loop invariant expression id. */
312 int inv_expr_id;
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
326 /* Cache used by tree_to_aff_combination_expand. */
327 hash_map<tree, name_expansion *> *name_expansion_cache;
329 /* The maximum invariant id. */
330 unsigned max_inv_id;
332 /* Whether to consider just related and important candidates when replacing a
333 use. */
334 bool consider_all_candidates;
336 /* Are we optimizing for speed? */
337 bool speed;
339 /* Whether the loop body includes any function calls. */
340 bool body_includes_call;
342 /* Whether the loop body can only be exited via single exit. */
343 bool loop_single_exit_p;
346 /* An assignment of iv candidates to uses. */
348 struct iv_ca
350 /* The number of uses covered by the assignment. */
351 unsigned upto;
353 /* Number of uses that cannot be expressed by the candidates in the set. */
354 unsigned bad_uses;
356 /* Candidate assigned to a use, together with the related costs. */
357 struct cost_pair **cand_for_use;
359 /* Number of times each candidate is used. */
360 unsigned *n_cand_uses;
362 /* The candidates used. */
363 bitmap cands;
365 /* The number of candidates in the set. */
366 unsigned n_cands;
368 /* Total number of registers needed. */
369 unsigned n_regs;
371 /* Total cost of expressing uses. */
372 comp_cost cand_use_cost;
374 /* Total cost of candidates. */
375 unsigned cand_cost;
377 /* Number of times each invariant is used. */
378 unsigned *n_invariant_uses;
380 /* The array holding the number of uses of each loop
381 invariant expressions created by ivopt. */
382 unsigned *used_inv_expr;
384 /* The number of created loop invariants. */
385 unsigned num_used_inv_expr;
387 /* Total cost of the assignment. */
388 comp_cost cost;
391 /* Difference of two iv candidate assignments. */
393 struct iv_ca_delta
395 /* Changed use. */
396 struct iv_use *use;
398 /* An old assignment (for rollback purposes). */
399 struct cost_pair *old_cp;
401 /* A new assignment. */
402 struct cost_pair *new_cp;
404 /* Next change in the list. */
405 struct iv_ca_delta *next_change;
408 /* Bound on number of candidates below that all candidates are considered. */
410 #define CONSIDER_ALL_CANDIDATES_BOUND \
411 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
414 optimizing such a loop would help, and it would take ages). */
416 #define MAX_CONSIDERED_USES \
417 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
419 /* If there are at most this number of ivs in the set, try removing unnecessary
420 ivs from the set always. */
422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
425 /* The list of trees for that the decl_rtl field must be reset is stored
426 here. */
428 static vec<tree> decl_rtl_to_reset;
430 static comp_cost force_expr_to_var_cost (tree, bool);
432 /* Number of uses recorded in DATA. */
434 static inline unsigned
435 n_iv_uses (struct ivopts_data *data)
437 return data->iv_uses.length ();
440 /* Ith use recorded in DATA. */
442 static inline struct iv_use *
443 iv_use (struct ivopts_data *data, unsigned i)
445 return data->iv_uses[i];
448 /* Number of candidates recorded in DATA. */
450 static inline unsigned
451 n_iv_cands (struct ivopts_data *data)
453 return data->iv_candidates.length ();
456 /* Ith candidate recorded in DATA. */
458 static inline struct iv_cand *
459 iv_cand (struct ivopts_data *data, unsigned i)
461 return data->iv_candidates[i];
464 /* The single loop exit if it dominates the latch, NULL otherwise. */
466 edge
467 single_dom_exit (struct loop *loop)
469 edge exit = single_exit (loop);
471 if (!exit)
472 return NULL;
474 if (!just_once_each_iteration_p (loop, exit->src))
475 return NULL;
477 return exit;
480 /* Dumps information about the induction variable IV to FILE. */
482 void
483 dump_iv (FILE *file, struct iv *iv)
485 if (iv->ssa_name)
487 fprintf (file, "ssa name ");
488 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
489 fprintf (file, "\n");
492 fprintf (file, " type ");
493 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
494 fprintf (file, "\n");
496 if (iv->step)
498 fprintf (file, " base ");
499 print_generic_expr (file, iv->base, TDF_SLIM);
500 fprintf (file, "\n");
502 fprintf (file, " step ");
503 print_generic_expr (file, iv->step, TDF_SLIM);
504 fprintf (file, "\n");
506 else
508 fprintf (file, " invariant ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
513 if (iv->base_object)
515 fprintf (file, " base object ");
516 print_generic_expr (file, iv->base_object, TDF_SLIM);
517 fprintf (file, "\n");
520 if (iv->biv_p)
521 fprintf (file, " is a biv\n");
524 /* Dumps information about the USE to FILE. */
526 void
527 dump_use (FILE *file, struct iv_use *use)
529 fprintf (file, "use %d\n", use->id);
531 switch (use->type)
533 case USE_NONLINEAR_EXPR:
534 fprintf (file, " generic\n");
535 break;
537 case USE_ADDRESS:
538 fprintf (file, " address\n");
539 break;
541 case USE_COMPARE:
542 fprintf (file, " compare\n");
543 break;
545 default:
546 gcc_unreachable ();
549 fprintf (file, " in statement ");
550 print_gimple_stmt (file, use->stmt, 0, 0);
551 fprintf (file, "\n");
553 fprintf (file, " at position ");
554 if (use->op_p)
555 print_generic_expr (file, *use->op_p, TDF_SLIM);
556 fprintf (file, "\n");
558 dump_iv (file, use->iv);
560 if (use->related_cands)
562 fprintf (file, " related candidates ");
563 dump_bitmap (file, use->related_cands);
567 /* Dumps information about the uses to FILE. */
569 void
570 dump_uses (FILE *file, struct ivopts_data *data)
572 unsigned i;
573 struct iv_use *use;
575 for (i = 0; i < n_iv_uses (data); i++)
577 use = iv_use (data, i);
579 dump_use (file, use);
580 fprintf (file, "\n");
584 /* Dumps information about induction variable candidate CAND to FILE. */
586 void
587 dump_cand (FILE *file, struct iv_cand *cand)
589 struct iv *iv = cand->iv;
591 fprintf (file, "candidate %d%s\n",
592 cand->id, cand->important ? " (important)" : "");
594 if (cand->depends_on)
596 fprintf (file, " depends on ");
597 dump_bitmap (file, cand->depends_on);
600 if (!iv)
602 fprintf (file, " final value replacement\n");
603 return;
606 if (cand->var_before)
608 fprintf (file, " var_before ");
609 print_generic_expr (file, cand->var_before, TDF_SLIM);
610 fprintf (file, "\n");
612 if (cand->var_after)
614 fprintf (file, " var_after ");
615 print_generic_expr (file, cand->var_after, TDF_SLIM);
616 fprintf (file, "\n");
619 switch (cand->pos)
621 case IP_NORMAL:
622 fprintf (file, " incremented before exit test\n");
623 break;
625 case IP_BEFORE_USE:
626 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
627 break;
629 case IP_AFTER_USE:
630 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
631 break;
633 case IP_END:
634 fprintf (file, " incremented at end\n");
635 break;
637 case IP_ORIGINAL:
638 fprintf (file, " original biv\n");
639 break;
642 dump_iv (file, iv);
645 /* Returns the info for ssa version VER. */
647 static inline struct version_info *
648 ver_info (struct ivopts_data *data, unsigned ver)
650 return data->version_info + ver;
653 /* Returns the info for ssa name NAME. */
655 static inline struct version_info *
656 name_info (struct ivopts_data *data, tree name)
658 return ver_info (data, SSA_NAME_VERSION (name));
661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 emitted in LOOP. */
664 static bool
665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
667 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
669 gcc_assert (bb);
671 if (sbb == loop->latch)
672 return true;
674 if (sbb != bb)
675 return false;
677 return stmt == last_stmt (bb);
680 /* Returns true if STMT if after the place where the original induction
681 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
682 if the positions are identical. */
684 static bool
685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
687 basic_block cand_bb = gimple_bb (cand->incremented_at);
688 basic_block stmt_bb = gimple_bb (stmt);
690 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
691 return false;
693 if (stmt_bb != cand_bb)
694 return true;
696 if (true_if_equal
697 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
698 return true;
699 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
702 /* Returns true if STMT if after the place where the induction variable
703 CAND is incremented in LOOP. */
705 static bool
706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
708 switch (cand->pos)
710 case IP_END:
711 return false;
713 case IP_NORMAL:
714 return stmt_after_ip_normal_pos (loop, stmt);
716 case IP_ORIGINAL:
717 case IP_AFTER_USE:
718 return stmt_after_inc_pos (cand, stmt, false);
720 case IP_BEFORE_USE:
721 return stmt_after_inc_pos (cand, stmt, true);
723 default:
724 gcc_unreachable ();
728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
730 static bool
731 abnormal_ssa_name_p (tree exp)
733 if (!exp)
734 return false;
736 if (TREE_CODE (exp) != SSA_NAME)
737 return false;
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
743 abnormal phi node. Callback for for_each_index. */
745 static bool
746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
747 void *data ATTRIBUTE_UNUSED)
749 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
751 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
752 return false;
753 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
754 return false;
757 return !abnormal_ssa_name_p (*index);
760 /* Returns true if EXPR contains a ssa name that occurs in an
761 abnormal phi node. */
763 bool
764 contains_abnormal_ssa_name_p (tree expr)
766 enum tree_code code;
767 enum tree_code_class codeclass;
769 if (!expr)
770 return false;
772 code = TREE_CODE (expr);
773 codeclass = TREE_CODE_CLASS (code);
775 if (code == SSA_NAME)
776 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
778 if (code == INTEGER_CST
779 || is_gimple_min_invariant (expr))
780 return false;
782 if (code == ADDR_EXPR)
783 return !for_each_index (&TREE_OPERAND (expr, 0),
784 idx_contains_abnormal_ssa_name_p,
785 NULL);
787 if (code == COND_EXPR)
788 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
789 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
790 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
792 switch (codeclass)
794 case tcc_binary:
795 case tcc_comparison:
796 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
797 return true;
799 /* Fallthru. */
800 case tcc_unary:
801 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
802 return true;
804 break;
806 default:
807 gcc_unreachable ();
810 return false;
813 /* Returns the structure describing number of iterations determined from
814 EXIT of DATA->current_loop, or NULL if something goes wrong. */
816 static struct tree_niter_desc *
817 niter_for_exit (struct ivopts_data *data, edge exit)
819 struct tree_niter_desc *desc;
820 tree_niter_desc **slot;
822 if (!data->niters)
824 data->niters = new hash_map<edge, tree_niter_desc *>;
825 slot = NULL;
827 else
828 slot = data->niters->get (exit);
830 if (!slot)
832 /* Try to determine number of iterations. We cannot safely work with ssa
833 names that appear in phi nodes on abnormal edges, so that we do not
834 create overlapping life ranges for them (PR 27283). */
835 desc = XNEW (struct tree_niter_desc);
836 if (!number_of_iterations_exit (data->current_loop,
837 exit, desc, true)
838 || contains_abnormal_ssa_name_p (desc->niter))
840 XDELETE (desc);
841 desc = NULL;
843 data->niters->put (exit, desc);
845 else
846 desc = *slot;
848 return desc;
851 /* Returns the structure describing number of iterations determined from
852 single dominating exit of DATA->current_loop, or NULL if something
853 goes wrong. */
855 static struct tree_niter_desc *
856 niter_for_single_dom_exit (struct ivopts_data *data)
858 edge exit = single_dom_exit (data->current_loop);
860 if (!exit)
861 return NULL;
863 return niter_for_exit (data, exit);
866 /* Initializes data structures used by the iv optimization pass, stored
867 in DATA. */
869 static void
870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
872 data->version_info_size = 2 * num_ssa_names;
873 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
874 data->relevant = BITMAP_ALLOC (NULL);
875 data->important_candidates = BITMAP_ALLOC (NULL);
876 data->max_inv_id = 0;
877 data->niters = NULL;
878 data->iv_uses.create (20);
879 data->iv_candidates.create (20);
880 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
881 data->inv_expr_id = 0;
882 data->name_expansion_cache = NULL;
883 decl_rtl_to_reset.create (20);
886 /* Returns a memory object to that EXPR points. In case we are able to
887 determine that it does not point to any such object, NULL is returned. */
889 static tree
890 determine_base_object (tree expr)
892 enum tree_code code = TREE_CODE (expr);
893 tree base, obj;
895 /* If this is a pointer casted to any type, we need to determine
896 the base object for the pointer; so handle conversions before
897 throwing away non-pointer expressions. */
898 if (CONVERT_EXPR_P (expr))
899 return determine_base_object (TREE_OPERAND (expr, 0));
901 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
902 return NULL_TREE;
904 switch (code)
906 case INTEGER_CST:
907 return NULL_TREE;
909 case ADDR_EXPR:
910 obj = TREE_OPERAND (expr, 0);
911 base = get_base_address (obj);
913 if (!base)
914 return expr;
916 if (TREE_CODE (base) == MEM_REF)
917 return determine_base_object (TREE_OPERAND (base, 0));
919 return fold_convert (ptr_type_node,
920 build_fold_addr_expr (base));
922 case POINTER_PLUS_EXPR:
923 return determine_base_object (TREE_OPERAND (expr, 0));
925 case PLUS_EXPR:
926 case MINUS_EXPR:
927 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
928 gcc_unreachable ();
930 default:
931 return fold_convert (ptr_type_node, expr);
935 /* Return true if address expression with non-DECL_P operand appears
936 in EXPR. */
938 static bool
939 contain_complex_addr_expr (tree expr)
941 bool res = false;
943 STRIP_NOPS (expr);
944 switch (TREE_CODE (expr))
946 case POINTER_PLUS_EXPR:
947 case PLUS_EXPR:
948 case MINUS_EXPR:
949 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
950 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
951 break;
953 case ADDR_EXPR:
954 return (!DECL_P (TREE_OPERAND (expr, 0)));
956 default:
957 return false;
960 return res;
963 /* Allocates an induction variable with given initial value BASE and step STEP
964 for loop LOOP. */
966 static struct iv *
967 alloc_iv (tree base, tree step)
969 tree expr = base;
970 struct iv *iv = XCNEW (struct iv);
971 gcc_assert (step != NULL_TREE);
973 /* Lower address expression in base except ones with DECL_P as operand.
974 By doing this:
975 1) More accurate cost can be computed for address expressions;
976 2) Duplicate candidates won't be created for bases in different
977 forms, like &a[0] and &a. */
978 STRIP_NOPS (expr);
979 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
980 || contain_complex_addr_expr (expr))
982 aff_tree comb;
983 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
984 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
987 iv->base = base;
988 iv->base_object = determine_base_object (base);
989 iv->step = step;
990 iv->biv_p = false;
991 iv->have_use_for = false;
992 iv->use_id = 0;
993 iv->ssa_name = NULL_TREE;
995 return iv;
998 /* Sets STEP and BASE for induction variable IV. */
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1003 struct version_info *info = name_info (data, iv);
1005 gcc_assert (!info->iv);
1007 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008 info->iv = alloc_iv (base, step);
1009 info->iv->ssa_name = iv;
1012 /* Finds induction variable declaration for VAR. */
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1017 basic_block bb;
1018 tree type = TREE_TYPE (var);
1020 if (!POINTER_TYPE_P (type)
1021 && !INTEGRAL_TYPE_P (type))
1022 return NULL;
1024 if (!name_info (data, var)->iv)
1026 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1028 if (!bb
1029 || !flow_bb_inside_loop_p (data->current_loop, bb))
1030 set_iv (data, var, var, build_int_cst (type, 0));
1033 return name_info (data, var)->iv;
1036 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1037 not define a simple affine biv with nonzero step. */
1039 static tree
1040 determine_biv_step (gimple phi)
1042 struct loop *loop = gimple_bb (phi)->loop_father;
1043 tree name = PHI_RESULT (phi);
1044 affine_iv iv;
1046 if (virtual_operand_p (name))
1047 return NULL_TREE;
1049 if (!simple_iv (loop, loop, name, &iv, true))
1050 return NULL_TREE;
1052 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1055 /* Finds basic ivs. */
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1060 gimple phi;
1061 tree step, type, base;
1062 bool found = false;
1063 struct loop *loop = data->current_loop;
1064 gimple_stmt_iterator psi;
1066 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1068 phi = gsi_stmt (psi);
1070 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071 continue;
1073 step = determine_biv_step (phi);
1074 if (!step)
1075 continue;
1077 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078 base = expand_simple_operations (base);
1079 if (contains_abnormal_ssa_name_p (base)
1080 || contains_abnormal_ssa_name_p (step))
1081 continue;
1083 type = TREE_TYPE (PHI_RESULT (phi));
1084 base = fold_convert (type, base);
1085 if (step)
1087 if (POINTER_TYPE_P (type))
1088 step = convert_to_ptrofftype (step);
1089 else
1090 step = fold_convert (type, step);
1093 set_iv (data, PHI_RESULT (phi), base, step);
1094 found = true;
1097 return found;
1100 /* Marks basic ivs. */
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1105 gimple phi, def;
1106 tree var;
1107 struct iv *iv, *incr_iv;
1108 struct loop *loop = data->current_loop;
1109 basic_block incr_bb;
1110 gimple_stmt_iterator psi;
1112 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1114 phi = gsi_stmt (psi);
1116 iv = get_iv (data, PHI_RESULT (phi));
1117 if (!iv)
1118 continue;
1120 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1121 def = SSA_NAME_DEF_STMT (var);
1122 /* Don't mark iv peeled from other one as biv. */
1123 if (def
1124 && gimple_code (def) == GIMPLE_PHI
1125 && gimple_bb (def) == loop->header)
1126 continue;
1128 incr_iv = get_iv (data, var);
1129 if (!incr_iv)
1130 continue;
1132 /* If the increment is in the subloop, ignore it. */
1133 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1134 if (incr_bb->loop_father != data->current_loop
1135 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1136 continue;
1138 iv->biv_p = true;
1139 incr_iv->biv_p = true;
1143 /* Checks whether STMT defines a linear induction variable and stores its
1144 parameters to IV. */
1146 static bool
1147 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1149 tree lhs;
1150 struct loop *loop = data->current_loop;
1152 iv->base = NULL_TREE;
1153 iv->step = NULL_TREE;
1155 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1156 return false;
1158 lhs = gimple_assign_lhs (stmt);
1159 if (TREE_CODE (lhs) != SSA_NAME)
1160 return false;
1162 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1163 return false;
1164 iv->base = expand_simple_operations (iv->base);
1166 if (contains_abnormal_ssa_name_p (iv->base)
1167 || contains_abnormal_ssa_name_p (iv->step))
1168 return false;
1170 /* If STMT could throw, then do not consider STMT as defining a GIV.
1171 While this will suppress optimizations, we can not safely delete this
1172 GIV and associated statements, even if it appears it is not used. */
1173 if (stmt_could_throw_p (stmt))
1174 return false;
1176 return true;
1179 /* Finds general ivs in statement STMT. */
1181 static void
1182 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1184 affine_iv iv;
1186 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1187 return;
1189 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1192 /* Finds general ivs in basic block BB. */
1194 static void
1195 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1197 gimple_stmt_iterator bsi;
1199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1200 find_givs_in_stmt (data, gsi_stmt (bsi));
1203 /* Finds general ivs. */
1205 static void
1206 find_givs (struct ivopts_data *data)
1208 struct loop *loop = data->current_loop;
1209 basic_block *body = get_loop_body_in_dom_order (loop);
1210 unsigned i;
1212 for (i = 0; i < loop->num_nodes; i++)
1213 find_givs_in_bb (data, body[i]);
1214 free (body);
1217 /* For each ssa name defined in LOOP determines whether it is an induction
1218 variable and if so, its initial value and step. */
1220 static bool
1221 find_induction_variables (struct ivopts_data *data)
1223 unsigned i;
1224 bitmap_iterator bi;
1226 if (!find_bivs (data))
1227 return false;
1229 find_givs (data);
1230 mark_bivs (data);
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1234 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1236 if (niter)
1238 fprintf (dump_file, " number of iterations ");
1239 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1240 if (!integer_zerop (niter->may_be_zero))
1242 fprintf (dump_file, "; zero if ");
1243 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1245 fprintf (dump_file, "\n\n");
1248 fprintf (dump_file, "Induction variables:\n\n");
1250 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1252 if (ver_info (data, i)->iv)
1253 dump_iv (dump_file, ver_info (data, i)->iv);
1257 return true;
1260 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1262 static struct iv_use *
1263 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1264 gimple stmt, enum use_type use_type)
1266 struct iv_use *use = XCNEW (struct iv_use);
1268 use->id = n_iv_uses (data);
1269 use->type = use_type;
1270 use->iv = iv;
1271 use->stmt = stmt;
1272 use->op_p = use_p;
1273 use->related_cands = BITMAP_ALLOC (NULL);
1275 /* To avoid showing ssa name in the dumps, if it was not reset by the
1276 caller. */
1277 iv->ssa_name = NULL_TREE;
1279 if (dump_file && (dump_flags & TDF_DETAILS))
1280 dump_use (dump_file, use);
1282 data->iv_uses.safe_push (use);
1284 return use;
1287 /* Checks whether OP is a loop-level invariant and if so, records it.
1288 NONLINEAR_USE is true if the invariant is used in a way we do not
1289 handle specially. */
1291 static void
1292 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1294 basic_block bb;
1295 struct version_info *info;
1297 if (TREE_CODE (op) != SSA_NAME
1298 || virtual_operand_p (op))
1299 return;
1301 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1302 if (bb
1303 && flow_bb_inside_loop_p (data->current_loop, bb))
1304 return;
1306 info = name_info (data, op);
1307 info->name = op;
1308 info->has_nonlin_use |= nonlinear_use;
1309 if (!info->inv_id)
1310 info->inv_id = ++data->max_inv_id;
1311 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1314 /* Checks whether the use OP is interesting and if so, records it. */
1316 static struct iv_use *
1317 find_interesting_uses_op (struct ivopts_data *data, tree op)
1319 struct iv *iv;
1320 struct iv *civ;
1321 gimple stmt;
1322 struct iv_use *use;
1324 if (TREE_CODE (op) != SSA_NAME)
1325 return NULL;
1327 iv = get_iv (data, op);
1328 if (!iv)
1329 return NULL;
1331 if (iv->have_use_for)
1333 use = iv_use (data, iv->use_id);
1335 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1336 return use;
1339 if (integer_zerop (iv->step))
1341 record_invariant (data, op, true);
1342 return NULL;
1344 iv->have_use_for = true;
1346 civ = XNEW (struct iv);
1347 *civ = *iv;
1349 stmt = SSA_NAME_DEF_STMT (op);
1350 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1351 || is_gimple_assign (stmt));
1353 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1354 iv->use_id = use->id;
1356 return use;
1359 /* Given a condition in statement STMT, checks whether it is a compare
1360 of an induction variable and an invariant. If this is the case,
1361 CONTROL_VAR is set to location of the iv, BOUND to the location of
1362 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1363 induction variable descriptions, and true is returned. If this is not
1364 the case, CONTROL_VAR and BOUND are set to the arguments of the
1365 condition and false is returned. */
1367 static bool
1368 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1369 tree **control_var, tree **bound,
1370 struct iv **iv_var, struct iv **iv_bound)
1372 /* The objects returned when COND has constant operands. */
1373 static struct iv const_iv;
1374 static tree zero;
1375 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1376 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1377 bool ret = false;
1379 if (gimple_code (stmt) == GIMPLE_COND)
1381 op0 = gimple_cond_lhs_ptr (stmt);
1382 op1 = gimple_cond_rhs_ptr (stmt);
1384 else
1386 op0 = gimple_assign_rhs1_ptr (stmt);
1387 op1 = gimple_assign_rhs2_ptr (stmt);
1390 zero = integer_zero_node;
1391 const_iv.step = integer_zero_node;
1393 if (TREE_CODE (*op0) == SSA_NAME)
1394 iv0 = get_iv (data, *op0);
1395 if (TREE_CODE (*op1) == SSA_NAME)
1396 iv1 = get_iv (data, *op1);
1398 /* Exactly one of the compared values must be an iv, and the other one must
1399 be an invariant. */
1400 if (!iv0 || !iv1)
1401 goto end;
1403 if (integer_zerop (iv0->step))
1405 /* Control variable may be on the other side. */
1406 tmp_op = op0; op0 = op1; op1 = tmp_op;
1407 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1409 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1411 end:
1412 if (control_var)
1413 *control_var = op0;;
1414 if (iv_var)
1415 *iv_var = iv0;;
1416 if (bound)
1417 *bound = op1;
1418 if (iv_bound)
1419 *iv_bound = iv1;
1421 return ret;
1424 /* Checks whether the condition in STMT is interesting and if so,
1425 records it. */
1427 static void
1428 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1430 tree *var_p, *bound_p;
1431 struct iv *var_iv, *civ;
1433 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1435 find_interesting_uses_op (data, *var_p);
1436 find_interesting_uses_op (data, *bound_p);
1437 return;
1440 civ = XNEW (struct iv);
1441 *civ = *var_iv;
1442 record_use (data, NULL, civ, stmt, USE_COMPARE);
1445 /* Returns the outermost loop EXPR is obviously invariant in
1446 relative to the loop LOOP, i.e. if all its operands are defined
1447 outside of the returned loop. Returns NULL if EXPR is not
1448 even obviously invariant in LOOP. */
1450 struct loop *
1451 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1453 basic_block def_bb;
1454 unsigned i, len;
1456 if (is_gimple_min_invariant (expr))
1457 return current_loops->tree_root;
1459 if (TREE_CODE (expr) == SSA_NAME)
1461 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1462 if (def_bb)
1464 if (flow_bb_inside_loop_p (loop, def_bb))
1465 return NULL;
1466 return superloop_at_depth (loop,
1467 loop_depth (def_bb->loop_father) + 1);
1470 return current_loops->tree_root;
1473 if (!EXPR_P (expr))
1474 return NULL;
1476 unsigned maxdepth = 0;
1477 len = TREE_OPERAND_LENGTH (expr);
1478 for (i = 0; i < len; i++)
1480 struct loop *ivloop;
1481 if (!TREE_OPERAND (expr, i))
1482 continue;
1484 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1485 if (!ivloop)
1486 return NULL;
1487 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1490 return superloop_at_depth (loop, maxdepth);
1493 /* Returns true if expression EXPR is obviously invariant in LOOP,
1494 i.e. if all its operands are defined outside of the LOOP. LOOP
1495 should not be the function body. */
1497 bool
1498 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1500 basic_block def_bb;
1501 unsigned i, len;
1503 gcc_assert (loop_depth (loop) > 0);
1505 if (is_gimple_min_invariant (expr))
1506 return true;
1508 if (TREE_CODE (expr) == SSA_NAME)
1510 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1511 if (def_bb
1512 && flow_bb_inside_loop_p (loop, def_bb))
1513 return false;
1515 return true;
1518 if (!EXPR_P (expr))
1519 return false;
1521 len = TREE_OPERAND_LENGTH (expr);
1522 for (i = 0; i < len; i++)
1523 if (TREE_OPERAND (expr, i)
1524 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1525 return false;
1527 return true;
1530 /* Cumulates the steps of indices into DATA and replaces their values with the
1531 initial ones. Returns false when the value of the index cannot be determined.
1532 Callback for for_each_index. */
1534 struct ifs_ivopts_data
1536 struct ivopts_data *ivopts_data;
1537 gimple stmt;
1538 tree step;
1541 static bool
1542 idx_find_step (tree base, tree *idx, void *data)
1544 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1545 struct iv *iv;
1546 tree step, iv_base, iv_step, lbound, off;
1547 struct loop *loop = dta->ivopts_data->current_loop;
1549 /* If base is a component ref, require that the offset of the reference
1550 be invariant. */
1551 if (TREE_CODE (base) == COMPONENT_REF)
1553 off = component_ref_field_offset (base);
1554 return expr_invariant_in_loop_p (loop, off);
1557 /* If base is array, first check whether we will be able to move the
1558 reference out of the loop (in order to take its address in strength
1559 reduction). In order for this to work we need both lower bound
1560 and step to be loop invariants. */
1561 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1563 /* Moreover, for a range, the size needs to be invariant as well. */
1564 if (TREE_CODE (base) == ARRAY_RANGE_REF
1565 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1566 return false;
1568 step = array_ref_element_size (base);
1569 lbound = array_ref_low_bound (base);
1571 if (!expr_invariant_in_loop_p (loop, step)
1572 || !expr_invariant_in_loop_p (loop, lbound))
1573 return false;
1576 if (TREE_CODE (*idx) != SSA_NAME)
1577 return true;
1579 iv = get_iv (dta->ivopts_data, *idx);
1580 if (!iv)
1581 return false;
1583 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1584 *&x[0], which is not folded and does not trigger the
1585 ARRAY_REF path below. */
1586 *idx = iv->base;
1588 if (integer_zerop (iv->step))
1589 return true;
1591 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1593 step = array_ref_element_size (base);
1595 /* We only handle addresses whose step is an integer constant. */
1596 if (TREE_CODE (step) != INTEGER_CST)
1597 return false;
1599 else
1600 /* The step for pointer arithmetics already is 1 byte. */
1601 step = size_one_node;
1603 iv_base = iv->base;
1604 iv_step = iv->step;
1605 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1606 sizetype, &iv_base, &iv_step, dta->stmt,
1607 false))
1609 /* The index might wrap. */
1610 return false;
1613 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1614 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1616 return true;
1619 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1620 object is passed to it in DATA. */
1622 static bool
1623 idx_record_use (tree base, tree *idx,
1624 void *vdata)
1626 struct ivopts_data *data = (struct ivopts_data *) vdata;
1627 find_interesting_uses_op (data, *idx);
1628 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1630 find_interesting_uses_op (data, array_ref_element_size (base));
1631 find_interesting_uses_op (data, array_ref_low_bound (base));
1633 return true;
1636 /* If we can prove that TOP = cst * BOT for some constant cst,
1637 store cst to MUL and return true. Otherwise return false.
1638 The returned value is always sign-extended, regardless of the
1639 signedness of TOP and BOT. */
1641 static bool
1642 constant_multiple_of (tree top, tree bot, widest_int *mul)
1644 tree mby;
1645 enum tree_code code;
1646 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1647 widest_int res, p0, p1;
1649 STRIP_NOPS (top);
1650 STRIP_NOPS (bot);
1652 if (operand_equal_p (top, bot, 0))
1654 *mul = 1;
1655 return true;
1658 code = TREE_CODE (top);
1659 switch (code)
1661 case MULT_EXPR:
1662 mby = TREE_OPERAND (top, 1);
1663 if (TREE_CODE (mby) != INTEGER_CST)
1664 return false;
1666 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1667 return false;
1669 *mul = wi::sext (res * wi::to_widest (mby), precision);
1670 return true;
1672 case PLUS_EXPR:
1673 case MINUS_EXPR:
1674 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1675 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1676 return false;
1678 if (code == MINUS_EXPR)
1679 p1 = -p1;
1680 *mul = wi::sext (p0 + p1, precision);
1681 return true;
1683 case INTEGER_CST:
1684 if (TREE_CODE (bot) != INTEGER_CST)
1685 return false;
1687 p0 = widest_int::from (top, SIGNED);
1688 p1 = widest_int::from (bot, SIGNED);
1689 if (p1 == 0)
1690 return false;
1691 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1692 return res == 0;
1694 default:
1695 return false;
1699 /* Return true if memory reference REF with step STEP may be unaligned. */
1701 static bool
1702 may_be_unaligned_p (tree ref, tree step)
1704 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1705 thus they are not misaligned. */
1706 if (TREE_CODE (ref) == TARGET_MEM_REF)
1707 return false;
1709 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1710 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1711 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1713 unsigned HOST_WIDE_INT bitpos;
1714 unsigned int ref_align;
1715 get_object_alignment_1 (ref, &ref_align, &bitpos);
1716 if (ref_align < align
1717 || (bitpos % align) != 0
1718 || (bitpos % BITS_PER_UNIT) != 0)
1719 return true;
1721 unsigned int trailing_zeros = tree_ctz (step);
1722 if (trailing_zeros < HOST_BITS_PER_INT
1723 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1724 return true;
1726 return false;
1729 /* Return true if EXPR may be non-addressable. */
1731 bool
1732 may_be_nonaddressable_p (tree expr)
1734 switch (TREE_CODE (expr))
1736 case TARGET_MEM_REF:
1737 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1738 target, thus they are always addressable. */
1739 return false;
1741 case COMPONENT_REF:
1742 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1743 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1745 case VIEW_CONVERT_EXPR:
1746 /* This kind of view-conversions may wrap non-addressable objects
1747 and make them look addressable. After some processing the
1748 non-addressability may be uncovered again, causing ADDR_EXPRs
1749 of inappropriate objects to be built. */
1750 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1751 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1752 return true;
1754 /* ... fall through ... */
1756 case ARRAY_REF:
1757 case ARRAY_RANGE_REF:
1758 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1760 CASE_CONVERT:
1761 return true;
1763 default:
1764 break;
1767 return false;
1770 /* Finds addresses in *OP_P inside STMT. */
1772 static void
1773 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1775 tree base = *op_p, step = size_zero_node;
1776 struct iv *civ;
1777 struct ifs_ivopts_data ifs_ivopts_data;
1779 /* Do not play with volatile memory references. A bit too conservative,
1780 perhaps, but safe. */
1781 if (gimple_has_volatile_ops (stmt))
1782 goto fail;
1784 /* Ignore bitfields for now. Not really something terribly complicated
1785 to handle. TODO. */
1786 if (TREE_CODE (base) == BIT_FIELD_REF)
1787 goto fail;
1789 base = unshare_expr (base);
1791 if (TREE_CODE (base) == TARGET_MEM_REF)
1793 tree type = build_pointer_type (TREE_TYPE (base));
1794 tree astep;
1796 if (TMR_BASE (base)
1797 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1799 civ = get_iv (data, TMR_BASE (base));
1800 if (!civ)
1801 goto fail;
1803 TMR_BASE (base) = civ->base;
1804 step = civ->step;
1806 if (TMR_INDEX2 (base)
1807 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1809 civ = get_iv (data, TMR_INDEX2 (base));
1810 if (!civ)
1811 goto fail;
1813 TMR_INDEX2 (base) = civ->base;
1814 step = civ->step;
1816 if (TMR_INDEX (base)
1817 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1819 civ = get_iv (data, TMR_INDEX (base));
1820 if (!civ)
1821 goto fail;
1823 TMR_INDEX (base) = civ->base;
1824 astep = civ->step;
1826 if (astep)
1828 if (TMR_STEP (base))
1829 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1831 step = fold_build2 (PLUS_EXPR, type, step, astep);
1835 if (integer_zerop (step))
1836 goto fail;
1837 base = tree_mem_ref_addr (type, base);
1839 else
1841 ifs_ivopts_data.ivopts_data = data;
1842 ifs_ivopts_data.stmt = stmt;
1843 ifs_ivopts_data.step = size_zero_node;
1844 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1845 || integer_zerop (ifs_ivopts_data.step))
1846 goto fail;
1847 step = ifs_ivopts_data.step;
1849 /* Check that the base expression is addressable. This needs
1850 to be done after substituting bases of IVs into it. */
1851 if (may_be_nonaddressable_p (base))
1852 goto fail;
1854 /* Moreover, on strict alignment platforms, check that it is
1855 sufficiently aligned. */
1856 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1857 goto fail;
1859 base = build_fold_addr_expr (base);
1861 /* Substituting bases of IVs into the base expression might
1862 have caused folding opportunities. */
1863 if (TREE_CODE (base) == ADDR_EXPR)
1865 tree *ref = &TREE_OPERAND (base, 0);
1866 while (handled_component_p (*ref))
1867 ref = &TREE_OPERAND (*ref, 0);
1868 if (TREE_CODE (*ref) == MEM_REF)
1870 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1871 TREE_OPERAND (*ref, 0),
1872 TREE_OPERAND (*ref, 1));
1873 if (tem)
1874 *ref = tem;
1879 civ = alloc_iv (base, step);
1880 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1881 return;
1883 fail:
1884 for_each_index (op_p, idx_record_use, data);
1887 /* Finds and records invariants used in STMT. */
1889 static void
1890 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1892 ssa_op_iter iter;
1893 use_operand_p use_p;
1894 tree op;
1896 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1898 op = USE_FROM_PTR (use_p);
1899 record_invariant (data, op, false);
1903 /* Finds interesting uses of induction variables in the statement STMT. */
1905 static void
1906 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1908 struct iv *iv;
1909 tree op, *lhs, *rhs;
1910 ssa_op_iter iter;
1911 use_operand_p use_p;
1912 enum tree_code code;
1914 find_invariants_stmt (data, stmt);
1916 if (gimple_code (stmt) == GIMPLE_COND)
1918 find_interesting_uses_cond (data, stmt);
1919 return;
1922 if (is_gimple_assign (stmt))
1924 lhs = gimple_assign_lhs_ptr (stmt);
1925 rhs = gimple_assign_rhs1_ptr (stmt);
1927 if (TREE_CODE (*lhs) == SSA_NAME)
1929 /* If the statement defines an induction variable, the uses are not
1930 interesting by themselves. */
1932 iv = get_iv (data, *lhs);
1934 if (iv && !integer_zerop (iv->step))
1935 return;
1938 code = gimple_assign_rhs_code (stmt);
1939 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1940 && (REFERENCE_CLASS_P (*rhs)
1941 || is_gimple_val (*rhs)))
1943 if (REFERENCE_CLASS_P (*rhs))
1944 find_interesting_uses_address (data, stmt, rhs);
1945 else
1946 find_interesting_uses_op (data, *rhs);
1948 if (REFERENCE_CLASS_P (*lhs))
1949 find_interesting_uses_address (data, stmt, lhs);
1950 return;
1952 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1954 find_interesting_uses_cond (data, stmt);
1955 return;
1958 /* TODO -- we should also handle address uses of type
1960 memory = call (whatever);
1964 call (memory). */
1967 if (gimple_code (stmt) == GIMPLE_PHI
1968 && gimple_bb (stmt) == data->current_loop->header)
1970 iv = get_iv (data, PHI_RESULT (stmt));
1972 if (iv && !integer_zerop (iv->step))
1973 return;
1976 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1978 op = USE_FROM_PTR (use_p);
1980 if (TREE_CODE (op) != SSA_NAME)
1981 continue;
1983 iv = get_iv (data, op);
1984 if (!iv)
1985 continue;
1987 find_interesting_uses_op (data, op);
1991 /* Finds interesting uses of induction variables outside of loops
1992 on loop exit edge EXIT. */
1994 static void
1995 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1997 gimple phi;
1998 gimple_stmt_iterator psi;
1999 tree def;
2001 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2003 phi = gsi_stmt (psi);
2004 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2005 if (!virtual_operand_p (def))
2006 find_interesting_uses_op (data, def);
2010 /* Finds uses of the induction variables that are interesting. */
2012 static void
2013 find_interesting_uses (struct ivopts_data *data)
2015 basic_block bb;
2016 gimple_stmt_iterator bsi;
2017 basic_block *body = get_loop_body (data->current_loop);
2018 unsigned i;
2019 struct version_info *info;
2020 edge e;
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2023 fprintf (dump_file, "Uses:\n\n");
2025 for (i = 0; i < data->current_loop->num_nodes; i++)
2027 edge_iterator ei;
2028 bb = body[i];
2030 FOR_EACH_EDGE (e, ei, bb->succs)
2031 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2032 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2033 find_interesting_uses_outside (data, e);
2035 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2036 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2037 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2038 if (!is_gimple_debug (gsi_stmt (bsi)))
2039 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2044 bitmap_iterator bi;
2046 fprintf (dump_file, "\n");
2048 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2050 info = ver_info (data, i);
2051 if (info->inv_id)
2053 fprintf (dump_file, " ");
2054 print_generic_expr (dump_file, info->name, TDF_SLIM);
2055 fprintf (dump_file, " is invariant (%d)%s\n",
2056 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2060 fprintf (dump_file, "\n");
2063 free (body);
2066 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2067 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2068 we are at the top-level of the processed address. */
2070 static tree
2071 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2072 HOST_WIDE_INT *offset)
2074 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2075 enum tree_code code;
2076 tree type, orig_type = TREE_TYPE (expr);
2077 HOST_WIDE_INT off0, off1, st;
2078 tree orig_expr = expr;
2080 STRIP_NOPS (expr);
2082 type = TREE_TYPE (expr);
2083 code = TREE_CODE (expr);
2084 *offset = 0;
2086 switch (code)
2088 case INTEGER_CST:
2089 if (!cst_and_fits_in_hwi (expr)
2090 || integer_zerop (expr))
2091 return orig_expr;
2093 *offset = int_cst_value (expr);
2094 return build_int_cst (orig_type, 0);
2096 case POINTER_PLUS_EXPR:
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 op0 = TREE_OPERAND (expr, 0);
2100 op1 = TREE_OPERAND (expr, 1);
2102 op0 = strip_offset_1 (op0, false, false, &off0);
2103 op1 = strip_offset_1 (op1, false, false, &off1);
2105 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2106 if (op0 == TREE_OPERAND (expr, 0)
2107 && op1 == TREE_OPERAND (expr, 1))
2108 return orig_expr;
2110 if (integer_zerop (op1))
2111 expr = op0;
2112 else if (integer_zerop (op0))
2114 if (code == MINUS_EXPR)
2115 expr = fold_build1 (NEGATE_EXPR, type, op1);
2116 else
2117 expr = op1;
2119 else
2120 expr = fold_build2 (code, type, op0, op1);
2122 return fold_convert (orig_type, expr);
2124 case MULT_EXPR:
2125 op1 = TREE_OPERAND (expr, 1);
2126 if (!cst_and_fits_in_hwi (op1))
2127 return orig_expr;
2129 op0 = TREE_OPERAND (expr, 0);
2130 op0 = strip_offset_1 (op0, false, false, &off0);
2131 if (op0 == TREE_OPERAND (expr, 0))
2132 return orig_expr;
2134 *offset = off0 * int_cst_value (op1);
2135 if (integer_zerop (op0))
2136 expr = op0;
2137 else
2138 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2140 return fold_convert (orig_type, expr);
2142 case ARRAY_REF:
2143 case ARRAY_RANGE_REF:
2144 if (!inside_addr)
2145 return orig_expr;
2147 step = array_ref_element_size (expr);
2148 if (!cst_and_fits_in_hwi (step))
2149 break;
2151 st = int_cst_value (step);
2152 op1 = TREE_OPERAND (expr, 1);
2153 op1 = strip_offset_1 (op1, false, false, &off1);
2154 *offset = off1 * st;
2156 if (top_compref
2157 && integer_zerop (op1))
2159 /* Strip the component reference completely. */
2160 op0 = TREE_OPERAND (expr, 0);
2161 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2162 *offset += off0;
2163 return op0;
2165 break;
2167 case COMPONENT_REF:
2169 tree field;
2171 if (!inside_addr)
2172 return orig_expr;
2174 tmp = component_ref_field_offset (expr);
2175 field = TREE_OPERAND (expr, 1);
2176 if (top_compref
2177 && cst_and_fits_in_hwi (tmp)
2178 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2180 HOST_WIDE_INT boffset, abs_off;
2182 /* Strip the component reference completely. */
2183 op0 = TREE_OPERAND (expr, 0);
2184 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2185 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2186 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2187 if (boffset < 0)
2188 abs_off = -abs_off;
2190 *offset = off0 + int_cst_value (tmp) + abs_off;
2191 return op0;
2194 break;
2196 case ADDR_EXPR:
2197 op0 = TREE_OPERAND (expr, 0);
2198 op0 = strip_offset_1 (op0, true, true, &off0);
2199 *offset += off0;
2201 if (op0 == TREE_OPERAND (expr, 0))
2202 return orig_expr;
2204 expr = build_fold_addr_expr (op0);
2205 return fold_convert (orig_type, expr);
2207 case MEM_REF:
2208 /* ??? Offset operand? */
2209 inside_addr = false;
2210 break;
2212 default:
2213 return orig_expr;
2216 /* Default handling of expressions for that we want to recurse into
2217 the first operand. */
2218 op0 = TREE_OPERAND (expr, 0);
2219 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2220 *offset += off0;
2222 if (op0 == TREE_OPERAND (expr, 0)
2223 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2224 return orig_expr;
2226 expr = copy_node (expr);
2227 TREE_OPERAND (expr, 0) = op0;
2228 if (op1)
2229 TREE_OPERAND (expr, 1) = op1;
2231 /* Inside address, we might strip the top level component references,
2232 thus changing type of the expression. Handling of ADDR_EXPR
2233 will fix that. */
2234 expr = fold_convert (orig_type, expr);
2236 return expr;
2239 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2241 static tree
2242 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2244 HOST_WIDE_INT off;
2245 tree core = strip_offset_1 (expr, false, false, &off);
2246 *offset = off;
2247 return core;
2250 /* Returns variant of TYPE that can be used as base for different uses.
2251 We return unsigned type with the same precision, which avoids problems
2252 with overflows. */
2254 static tree
2255 generic_type_for (tree type)
2257 if (POINTER_TYPE_P (type))
2258 return unsigned_type_for (type);
2260 if (TYPE_UNSIGNED (type))
2261 return type;
2263 return unsigned_type_for (type);
2266 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2267 the bitmap to that we should store it. */
2269 static struct ivopts_data *fd_ivopts_data;
2270 static tree
2271 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2273 bitmap *depends_on = (bitmap *) data;
2274 struct version_info *info;
2276 if (TREE_CODE (*expr_p) != SSA_NAME)
2277 return NULL_TREE;
2278 info = name_info (fd_ivopts_data, *expr_p);
2280 if (!info->inv_id || info->has_nonlin_use)
2281 return NULL_TREE;
2283 if (!*depends_on)
2284 *depends_on = BITMAP_ALLOC (NULL);
2285 bitmap_set_bit (*depends_on, info->inv_id);
2287 return NULL_TREE;
2290 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2291 position to POS. If USE is not NULL, the candidate is set as related to
2292 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2293 replacement of the final value of the iv by a direct computation. */
2295 static struct iv_cand *
2296 add_candidate_1 (struct ivopts_data *data,
2297 tree base, tree step, bool important, enum iv_position pos,
2298 struct iv_use *use, gimple incremented_at)
2300 unsigned i;
2301 struct iv_cand *cand = NULL;
2302 tree type, orig_type;
2304 /* For non-original variables, make sure their values are computed in a type
2305 that does not invoke undefined behavior on overflows (since in general,
2306 we cannot prove that these induction variables are non-wrapping). */
2307 if (pos != IP_ORIGINAL)
2309 orig_type = TREE_TYPE (base);
2310 type = generic_type_for (orig_type);
2311 if (type != orig_type)
2313 base = fold_convert (type, base);
2314 step = fold_convert (type, step);
2318 for (i = 0; i < n_iv_cands (data); i++)
2320 cand = iv_cand (data, i);
2322 if (cand->pos != pos)
2323 continue;
2325 if (cand->incremented_at != incremented_at
2326 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2327 && cand->ainc_use != use))
2328 continue;
2330 if (!cand->iv)
2332 if (!base && !step)
2333 break;
2335 continue;
2338 if (!base && !step)
2339 continue;
2341 if (operand_equal_p (base, cand->iv->base, 0)
2342 && operand_equal_p (step, cand->iv->step, 0)
2343 && (TYPE_PRECISION (TREE_TYPE (base))
2344 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2345 break;
2348 if (i == n_iv_cands (data))
2350 cand = XCNEW (struct iv_cand);
2351 cand->id = i;
2353 if (!base && !step)
2354 cand->iv = NULL;
2355 else
2356 cand->iv = alloc_iv (base, step);
2358 cand->pos = pos;
2359 if (pos != IP_ORIGINAL && cand->iv)
2361 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2362 cand->var_after = cand->var_before;
2364 cand->important = important;
2365 cand->incremented_at = incremented_at;
2366 data->iv_candidates.safe_push (cand);
2368 if (step
2369 && TREE_CODE (step) != INTEGER_CST)
2371 fd_ivopts_data = data;
2372 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2375 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2376 cand->ainc_use = use;
2377 else
2378 cand->ainc_use = NULL;
2380 if (dump_file && (dump_flags & TDF_DETAILS))
2381 dump_cand (dump_file, cand);
2384 if (important && !cand->important)
2386 cand->important = true;
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2391 if (use)
2393 bitmap_set_bit (use->related_cands, i);
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2395 fprintf (dump_file, "Candidate %d is related to use %d\n",
2396 cand->id, use->id);
2399 return cand;
2402 /* Returns true if incrementing the induction variable at the end of the LOOP
2403 is allowed.
2405 The purpose is to avoid splitting latch edge with a biv increment, thus
2406 creating a jump, possibly confusing other optimization passes and leaving
2407 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2408 is not available (so we do not have a better alternative), or if the latch
2409 edge is already nonempty. */
2411 static bool
2412 allow_ip_end_pos_p (struct loop *loop)
2414 if (!ip_normal_pos (loop))
2415 return true;
2417 if (!empty_block_p (ip_end_pos (loop)))
2418 return true;
2420 return false;
2423 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2424 Important field is set to IMPORTANT. */
2426 static void
2427 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2428 bool important, struct iv_use *use)
2430 basic_block use_bb = gimple_bb (use->stmt);
2431 enum machine_mode mem_mode;
2432 unsigned HOST_WIDE_INT cstepi;
2434 /* If we insert the increment in any position other than the standard
2435 ones, we must ensure that it is incremented once per iteration.
2436 It must not be in an inner nested loop, or one side of an if
2437 statement. */
2438 if (use_bb->loop_father != data->current_loop
2439 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2440 || stmt_could_throw_p (use->stmt)
2441 || !cst_and_fits_in_hwi (step))
2442 return;
2444 cstepi = int_cst_value (step);
2446 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2447 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2448 || USE_STORE_PRE_INCREMENT (mem_mode))
2449 && GET_MODE_SIZE (mem_mode) == cstepi)
2450 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2451 || USE_STORE_PRE_DECREMENT (mem_mode))
2452 && GET_MODE_SIZE (mem_mode) == -cstepi))
2454 enum tree_code code = MINUS_EXPR;
2455 tree new_base;
2456 tree new_step = step;
2458 if (POINTER_TYPE_P (TREE_TYPE (base)))
2460 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2461 code = POINTER_PLUS_EXPR;
2463 else
2464 new_step = fold_convert (TREE_TYPE (base), new_step);
2465 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2466 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2467 use->stmt);
2469 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2470 || USE_STORE_POST_INCREMENT (mem_mode))
2471 && GET_MODE_SIZE (mem_mode) == cstepi)
2472 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2473 || USE_STORE_POST_DECREMENT (mem_mode))
2474 && GET_MODE_SIZE (mem_mode) == -cstepi))
2476 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2477 use->stmt);
2481 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2482 position to POS. If USE is not NULL, the candidate is set as related to
2483 it. The candidate computation is scheduled on all available positions. */
2485 static void
2486 add_candidate (struct ivopts_data *data,
2487 tree base, tree step, bool important, struct iv_use *use)
2489 if (ip_normal_pos (data->current_loop))
2490 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2491 if (ip_end_pos (data->current_loop)
2492 && allow_ip_end_pos_p (data->current_loop))
2493 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2495 if (use != NULL && use->type == USE_ADDRESS)
2496 add_autoinc_candidates (data, base, step, important, use);
2499 /* Adds standard iv candidates. */
2501 static void
2502 add_standard_iv_candidates (struct ivopts_data *data)
2504 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2506 /* The same for a double-integer type if it is still fast enough. */
2507 if (TYPE_PRECISION
2508 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2509 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2510 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2511 build_int_cst (long_integer_type_node, 1), true, NULL);
2513 /* The same for a double-integer type if it is still fast enough. */
2514 if (TYPE_PRECISION
2515 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2516 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2517 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2518 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2522 /* Adds candidates bases on the old induction variable IV. */
2524 static void
2525 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2527 gimple phi;
2528 tree def;
2529 struct iv_cand *cand;
2531 add_candidate (data, iv->base, iv->step, true, NULL);
2533 /* The same, but with initial value zero. */
2534 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2535 add_candidate (data, size_int (0), iv->step, true, NULL);
2536 else
2537 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2538 iv->step, true, NULL);
2540 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2541 if (gimple_code (phi) == GIMPLE_PHI)
2543 /* Additionally record the possibility of leaving the original iv
2544 untouched. */
2545 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2546 /* Don't add candidate if it's from another PHI node because
2547 it's an affine iv appearing in the form of PEELED_CHREC. */
2548 phi = SSA_NAME_DEF_STMT (def);
2549 if (gimple_code (phi) != GIMPLE_PHI)
2551 cand = add_candidate_1 (data,
2552 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2553 SSA_NAME_DEF_STMT (def));
2554 cand->var_before = iv->ssa_name;
2555 cand->var_after = def;
2557 else
2558 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2562 /* Adds candidates based on the old induction variables. */
2564 static void
2565 add_old_ivs_candidates (struct ivopts_data *data)
2567 unsigned i;
2568 struct iv *iv;
2569 bitmap_iterator bi;
2571 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2573 iv = ver_info (data, i)->iv;
2574 if (iv && iv->biv_p && !integer_zerop (iv->step))
2575 add_old_iv_candidates (data, iv);
2579 /* Adds candidates based on the value of the induction variable IV and USE. */
2581 static void
2582 add_iv_value_candidates (struct ivopts_data *data,
2583 struct iv *iv, struct iv_use *use)
2585 unsigned HOST_WIDE_INT offset;
2586 tree base;
2587 tree basetype;
2589 add_candidate (data, iv->base, iv->step, false, use);
2591 /* The same, but with initial value zero. Make such variable important,
2592 since it is generic enough so that possibly many uses may be based
2593 on it. */
2594 basetype = TREE_TYPE (iv->base);
2595 if (POINTER_TYPE_P (basetype))
2596 basetype = sizetype;
2597 add_candidate (data, build_int_cst (basetype, 0),
2598 iv->step, true, use);
2600 /* Third, try removing the constant offset. Make sure to even
2601 add a candidate for &a[0] vs. (T *)&a. */
2602 base = strip_offset (iv->base, &offset);
2603 if (offset
2604 || base != iv->base)
2605 add_candidate (data, base, iv->step, false, use);
2608 /* Adds candidates based on the uses. */
2610 static void
2611 add_derived_ivs_candidates (struct ivopts_data *data)
2613 unsigned i;
2615 for (i = 0; i < n_iv_uses (data); i++)
2617 struct iv_use *use = iv_use (data, i);
2619 if (!use)
2620 continue;
2622 switch (use->type)
2624 case USE_NONLINEAR_EXPR:
2625 case USE_COMPARE:
2626 case USE_ADDRESS:
2627 /* Just add the ivs based on the value of the iv used here. */
2628 add_iv_value_candidates (data, use->iv, use);
2629 break;
2631 default:
2632 gcc_unreachable ();
2637 /* Record important candidates and add them to related_cands bitmaps
2638 if needed. */
2640 static void
2641 record_important_candidates (struct ivopts_data *data)
2643 unsigned i;
2644 struct iv_use *use;
2646 for (i = 0; i < n_iv_cands (data); i++)
2648 struct iv_cand *cand = iv_cand (data, i);
2650 if (cand->important)
2651 bitmap_set_bit (data->important_candidates, i);
2654 data->consider_all_candidates = (n_iv_cands (data)
2655 <= CONSIDER_ALL_CANDIDATES_BOUND);
2657 if (data->consider_all_candidates)
2659 /* We will not need "related_cands" bitmaps in this case,
2660 so release them to decrease peak memory consumption. */
2661 for (i = 0; i < n_iv_uses (data); i++)
2663 use = iv_use (data, i);
2664 BITMAP_FREE (use->related_cands);
2667 else
2669 /* Add important candidates to the related_cands bitmaps. */
2670 for (i = 0; i < n_iv_uses (data); i++)
2671 bitmap_ior_into (iv_use (data, i)->related_cands,
2672 data->important_candidates);
2676 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2677 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2678 we allocate a simple list to every use. */
2680 static void
2681 alloc_use_cost_map (struct ivopts_data *data)
2683 unsigned i, size, s;
2685 for (i = 0; i < n_iv_uses (data); i++)
2687 struct iv_use *use = iv_use (data, i);
2689 if (data->consider_all_candidates)
2690 size = n_iv_cands (data);
2691 else
2693 s = bitmap_count_bits (use->related_cands);
2695 /* Round up to the power of two, so that moduling by it is fast. */
2696 size = s ? (1 << ceil_log2 (s)) : 1;
2699 use->n_map_members = size;
2700 use->cost_map = XCNEWVEC (struct cost_pair, size);
2704 /* Returns description of computation cost of expression whose runtime
2705 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2707 static comp_cost
2708 new_cost (unsigned runtime, unsigned complexity)
2710 comp_cost cost;
2712 cost.cost = runtime;
2713 cost.complexity = complexity;
2715 return cost;
2718 /* Adds costs COST1 and COST2. */
2720 static comp_cost
2721 add_costs (comp_cost cost1, comp_cost cost2)
2723 cost1.cost += cost2.cost;
2724 cost1.complexity += cost2.complexity;
2726 return cost1;
2728 /* Subtracts costs COST1 and COST2. */
2730 static comp_cost
2731 sub_costs (comp_cost cost1, comp_cost cost2)
2733 cost1.cost -= cost2.cost;
2734 cost1.complexity -= cost2.complexity;
2736 return cost1;
2739 /* Returns a negative number if COST1 < COST2, a positive number if
2740 COST1 > COST2, and 0 if COST1 = COST2. */
2742 static int
2743 compare_costs (comp_cost cost1, comp_cost cost2)
2745 if (cost1.cost == cost2.cost)
2746 return cost1.complexity - cost2.complexity;
2748 return cost1.cost - cost2.cost;
2751 /* Returns true if COST is infinite. */
2753 static bool
2754 infinite_cost_p (comp_cost cost)
2756 return cost.cost == INFTY;
2759 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2760 on invariants DEPENDS_ON and that the value used in expressing it
2761 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2763 static void
2764 set_use_iv_cost (struct ivopts_data *data,
2765 struct iv_use *use, struct iv_cand *cand,
2766 comp_cost cost, bitmap depends_on, tree value,
2767 enum tree_code comp, int inv_expr_id)
2769 unsigned i, s;
2771 if (infinite_cost_p (cost))
2773 BITMAP_FREE (depends_on);
2774 return;
2777 if (data->consider_all_candidates)
2779 use->cost_map[cand->id].cand = cand;
2780 use->cost_map[cand->id].cost = cost;
2781 use->cost_map[cand->id].depends_on = depends_on;
2782 use->cost_map[cand->id].value = value;
2783 use->cost_map[cand->id].comp = comp;
2784 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2785 return;
2788 /* n_map_members is a power of two, so this computes modulo. */
2789 s = cand->id & (use->n_map_members - 1);
2790 for (i = s; i < use->n_map_members; i++)
2791 if (!use->cost_map[i].cand)
2792 goto found;
2793 for (i = 0; i < s; i++)
2794 if (!use->cost_map[i].cand)
2795 goto found;
2797 gcc_unreachable ();
2799 found:
2800 use->cost_map[i].cand = cand;
2801 use->cost_map[i].cost = cost;
2802 use->cost_map[i].depends_on = depends_on;
2803 use->cost_map[i].value = value;
2804 use->cost_map[i].comp = comp;
2805 use->cost_map[i].inv_expr_id = inv_expr_id;
2808 /* Gets cost of (USE, CANDIDATE) pair. */
2810 static struct cost_pair *
2811 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2812 struct iv_cand *cand)
2814 unsigned i, s;
2815 struct cost_pair *ret;
2817 if (!cand)
2818 return NULL;
2820 if (data->consider_all_candidates)
2822 ret = use->cost_map + cand->id;
2823 if (!ret->cand)
2824 return NULL;
2826 return ret;
2829 /* n_map_members is a power of two, so this computes modulo. */
2830 s = cand->id & (use->n_map_members - 1);
2831 for (i = s; i < use->n_map_members; i++)
2832 if (use->cost_map[i].cand == cand)
2833 return use->cost_map + i;
2834 else if (use->cost_map[i].cand == NULL)
2835 return NULL;
2836 for (i = 0; i < s; i++)
2837 if (use->cost_map[i].cand == cand)
2838 return use->cost_map + i;
2839 else if (use->cost_map[i].cand == NULL)
2840 return NULL;
2842 return NULL;
2845 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2846 static rtx
2847 produce_memory_decl_rtl (tree obj, int *regno)
2849 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2850 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2851 rtx x;
2853 gcc_assert (obj);
2854 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2856 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2857 x = gen_rtx_SYMBOL_REF (address_mode, name);
2858 SET_SYMBOL_REF_DECL (x, obj);
2859 x = gen_rtx_MEM (DECL_MODE (obj), x);
2860 set_mem_addr_space (x, as);
2861 targetm.encode_section_info (obj, x, true);
2863 else
2865 x = gen_raw_REG (address_mode, (*regno)++);
2866 x = gen_rtx_MEM (DECL_MODE (obj), x);
2867 set_mem_addr_space (x, as);
2870 return x;
2873 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2874 walk_tree. DATA contains the actual fake register number. */
2876 static tree
2877 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2879 tree obj = NULL_TREE;
2880 rtx x = NULL_RTX;
2881 int *regno = (int *) data;
2883 switch (TREE_CODE (*expr_p))
2885 case ADDR_EXPR:
2886 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2887 handled_component_p (*expr_p);
2888 expr_p = &TREE_OPERAND (*expr_p, 0))
2889 continue;
2890 obj = *expr_p;
2891 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2892 x = produce_memory_decl_rtl (obj, regno);
2893 break;
2895 case SSA_NAME:
2896 *ws = 0;
2897 obj = SSA_NAME_VAR (*expr_p);
2898 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2899 if (!obj)
2900 return NULL_TREE;
2901 if (!DECL_RTL_SET_P (obj))
2902 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2903 break;
2905 case VAR_DECL:
2906 case PARM_DECL:
2907 case RESULT_DECL:
2908 *ws = 0;
2909 obj = *expr_p;
2911 if (DECL_RTL_SET_P (obj))
2912 break;
2914 if (DECL_MODE (obj) == BLKmode)
2915 x = produce_memory_decl_rtl (obj, regno);
2916 else
2917 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2919 break;
2921 default:
2922 break;
2925 if (x)
2927 decl_rtl_to_reset.safe_push (obj);
2928 SET_DECL_RTL (obj, x);
2931 return NULL_TREE;
2934 /* Determines cost of the computation of EXPR. */
2936 static unsigned
2937 computation_cost (tree expr, bool speed)
2939 rtx_insn *seq;
2940 rtx rslt;
2941 tree type = TREE_TYPE (expr);
2942 unsigned cost;
2943 /* Avoid using hard regs in ways which may be unsupported. */
2944 int regno = LAST_VIRTUAL_REGISTER + 1;
2945 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2946 enum node_frequency real_frequency = node->frequency;
2948 node->frequency = NODE_FREQUENCY_NORMAL;
2949 crtl->maybe_hot_insn_p = speed;
2950 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2951 start_sequence ();
2952 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2953 seq = get_insns ();
2954 end_sequence ();
2955 default_rtl_profile ();
2956 node->frequency = real_frequency;
2958 cost = seq_cost (seq, speed);
2959 if (MEM_P (rslt))
2960 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2961 TYPE_ADDR_SPACE (type), speed);
2962 else if (!REG_P (rslt))
2963 cost += set_src_cost (rslt, speed);
2965 return cost;
2968 /* Returns variable containing the value of candidate CAND at statement AT. */
2970 static tree
2971 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2973 if (stmt_after_increment (loop, cand, stmt))
2974 return cand->var_after;
2975 else
2976 return cand->var_before;
2979 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2980 same precision that is at least as wide as the precision of TYPE, stores
2981 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2982 type of A and B. */
2984 static tree
2985 determine_common_wider_type (tree *a, tree *b)
2987 tree wider_type = NULL;
2988 tree suba, subb;
2989 tree atype = TREE_TYPE (*a);
2991 if (CONVERT_EXPR_P (*a))
2993 suba = TREE_OPERAND (*a, 0);
2994 wider_type = TREE_TYPE (suba);
2995 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2996 return atype;
2998 else
2999 return atype;
3001 if (CONVERT_EXPR_P (*b))
3003 subb = TREE_OPERAND (*b, 0);
3004 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3005 return atype;
3007 else
3008 return atype;
3010 *a = suba;
3011 *b = subb;
3012 return wider_type;
3015 /* Determines the expression by that USE is expressed from induction variable
3016 CAND at statement AT in LOOP. The expression is stored in a decomposed
3017 form into AFF. Returns false if USE cannot be expressed using CAND. */
3019 static bool
3020 get_computation_aff (struct loop *loop,
3021 struct iv_use *use, struct iv_cand *cand, gimple at,
3022 struct aff_tree *aff)
3024 tree ubase = use->iv->base;
3025 tree ustep = use->iv->step;
3026 tree cbase = cand->iv->base;
3027 tree cstep = cand->iv->step, cstep_common;
3028 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3029 tree common_type, var;
3030 tree uutype;
3031 aff_tree cbase_aff, var_aff;
3032 widest_int rat;
3034 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3036 /* We do not have a precision to express the values of use. */
3037 return false;
3040 var = var_at_stmt (loop, cand, at);
3041 uutype = unsigned_type_for (utype);
3043 /* If the conversion is not noop, perform it. */
3044 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3046 cstep = fold_convert (uutype, cstep);
3047 cbase = fold_convert (uutype, cbase);
3048 var = fold_convert (uutype, var);
3051 if (!constant_multiple_of (ustep, cstep, &rat))
3052 return false;
3054 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3055 type, we achieve better folding by computing their difference in this
3056 wider type, and cast the result to UUTYPE. We do not need to worry about
3057 overflows, as all the arithmetics will in the end be performed in UUTYPE
3058 anyway. */
3059 common_type = determine_common_wider_type (&ubase, &cbase);
3061 /* use = ubase - ratio * cbase + ratio * var. */
3062 tree_to_aff_combination (ubase, common_type, aff);
3063 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3064 tree_to_aff_combination (var, uutype, &var_aff);
3066 /* We need to shift the value if we are after the increment. */
3067 if (stmt_after_increment (loop, cand, at))
3069 aff_tree cstep_aff;
3071 if (common_type != uutype)
3072 cstep_common = fold_convert (common_type, cstep);
3073 else
3074 cstep_common = cstep;
3076 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3077 aff_combination_add (&cbase_aff, &cstep_aff);
3080 aff_combination_scale (&cbase_aff, -rat);
3081 aff_combination_add (aff, &cbase_aff);
3082 if (common_type != uutype)
3083 aff_combination_convert (aff, uutype);
3085 aff_combination_scale (&var_aff, rat);
3086 aff_combination_add (aff, &var_aff);
3088 return true;
3091 /* Return the type of USE. */
3093 static tree
3094 get_use_type (struct iv_use *use)
3096 tree base_type = TREE_TYPE (use->iv->base);
3097 tree type;
3099 if (use->type == USE_ADDRESS)
3101 /* The base_type may be a void pointer. Create a pointer type based on
3102 the mem_ref instead. */
3103 type = build_pointer_type (TREE_TYPE (*use->op_p));
3104 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3105 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3107 else
3108 type = base_type;
3110 return type;
3113 /* Determines the expression by that USE is expressed from induction variable
3114 CAND at statement AT in LOOP. The computation is unshared. */
3116 static tree
3117 get_computation_at (struct loop *loop,
3118 struct iv_use *use, struct iv_cand *cand, gimple at)
3120 aff_tree aff;
3121 tree type = get_use_type (use);
3123 if (!get_computation_aff (loop, use, cand, at, &aff))
3124 return NULL_TREE;
3125 unshare_aff_combination (&aff);
3126 return fold_convert (type, aff_combination_to_tree (&aff));
3129 /* Determines the expression by that USE is expressed from induction variable
3130 CAND in LOOP. The computation is unshared. */
3132 static tree
3133 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3135 return get_computation_at (loop, use, cand, use->stmt);
3138 /* Adjust the cost COST for being in loop setup rather than loop body.
3139 If we're optimizing for space, the loop setup overhead is constant;
3140 if we're optimizing for speed, amortize it over the per-iteration cost. */
3141 static unsigned
3142 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3144 if (cost == INFTY)
3145 return cost;
3146 else if (optimize_loop_for_speed_p (data->current_loop))
3147 return cost / avg_loop_niter (data->current_loop);
3148 else
3149 return cost;
3152 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3153 validity for a memory reference accessing memory of mode MODE in
3154 address space AS. */
3157 bool
3158 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3159 addr_space_t as)
3161 #define MAX_RATIO 128
3162 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3163 static vec<sbitmap> valid_mult_list;
3164 sbitmap valid_mult;
3166 if (data_index >= valid_mult_list.length ())
3167 valid_mult_list.safe_grow_cleared (data_index + 1);
3169 valid_mult = valid_mult_list[data_index];
3170 if (!valid_mult)
3172 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3173 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3174 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3175 rtx addr, scaled;
3176 HOST_WIDE_INT i;
3178 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3179 bitmap_clear (valid_mult);
3180 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3181 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3182 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3184 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3185 if (memory_address_addr_space_p (mode, addr, as)
3186 || memory_address_addr_space_p (mode, scaled, as))
3187 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3190 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file, " allowed multipliers:");
3193 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3194 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3195 fprintf (dump_file, " %d", (int) i);
3196 fprintf (dump_file, "\n");
3197 fprintf (dump_file, "\n");
3200 valid_mult_list[data_index] = valid_mult;
3203 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3204 return false;
3206 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3209 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3210 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3211 variable is omitted. Compute the cost for a memory reference that accesses
3212 a memory location of mode MEM_MODE in address space AS.
3214 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3215 size of MEM_MODE / RATIO) is available. To make this determination, we
3216 look at the size of the increment to be made, which is given in CSTEP.
3217 CSTEP may be zero if the step is unknown.
3218 STMT_AFTER_INC is true iff the statement we're looking at is after the
3219 increment of the original biv.
3221 TODO -- there must be some better way. This all is quite crude. */
3223 enum ainc_type
3225 AINC_PRE_INC, /* Pre increment. */
3226 AINC_PRE_DEC, /* Pre decrement. */
3227 AINC_POST_INC, /* Post increment. */
3228 AINC_POST_DEC, /* Post decrement. */
3229 AINC_NONE /* Also the number of auto increment types. */
3232 typedef struct address_cost_data_s
3234 HOST_WIDE_INT min_offset, max_offset;
3235 unsigned costs[2][2][2][2];
3236 unsigned ainc_costs[AINC_NONE];
3237 } *address_cost_data;
3240 static comp_cost
3241 get_address_cost (bool symbol_present, bool var_present,
3242 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3243 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3244 addr_space_t as, bool speed,
3245 bool stmt_after_inc, bool *may_autoinc)
3247 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3248 static vec<address_cost_data> address_cost_data_list;
3249 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3250 address_cost_data data;
3251 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3252 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3253 unsigned cost, acost, complexity;
3254 enum ainc_type autoinc_type;
3255 bool offset_p, ratio_p, autoinc;
3256 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3257 unsigned HOST_WIDE_INT mask;
3258 unsigned bits;
3260 if (data_index >= address_cost_data_list.length ())
3261 address_cost_data_list.safe_grow_cleared (data_index + 1);
3263 data = address_cost_data_list[data_index];
3264 if (!data)
3266 HOST_WIDE_INT i;
3267 HOST_WIDE_INT rat, off = 0;
3268 int old_cse_not_expected, width;
3269 unsigned sym_p, var_p, off_p, rat_p, add_c;
3270 rtx_insn *seq;
3271 rtx addr, base;
3272 rtx reg0, reg1;
3274 data = (address_cost_data) xcalloc (1, sizeof (*data));
3276 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3278 width = GET_MODE_BITSIZE (address_mode) - 1;
3279 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3280 width = HOST_BITS_PER_WIDE_INT - 1;
3281 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3283 for (i = width; i >= 0; i--)
3285 off = -((unsigned HOST_WIDE_INT) 1 << i);
3286 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3287 if (memory_address_addr_space_p (mem_mode, addr, as))
3288 break;
3290 data->min_offset = (i == -1? 0 : off);
3292 for (i = width; i >= 0; i--)
3294 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3295 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3296 if (memory_address_addr_space_p (mem_mode, addr, as))
3297 break;
3298 /* For some TARGET, like ARM THUMB1, the offset should be nature
3299 aligned. Try an aligned offset if address_mode is not QImode. */
3300 off = (address_mode == QImode)
3302 : ((unsigned HOST_WIDE_INT) 1 << i)
3303 - GET_MODE_SIZE (address_mode);
3304 if (off > 0)
3306 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3307 if (memory_address_addr_space_p (mem_mode, addr, as))
3308 break;
3311 if (i == -1)
3312 off = 0;
3313 data->max_offset = off;
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3317 fprintf (dump_file, "get_address_cost:\n");
3318 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3319 GET_MODE_NAME (mem_mode),
3320 data->min_offset);
3321 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3322 GET_MODE_NAME (mem_mode),
3323 data->max_offset);
3326 rat = 1;
3327 for (i = 2; i <= MAX_RATIO; i++)
3328 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3330 rat = i;
3331 break;
3334 /* Compute the cost of various addressing modes. */
3335 acost = 0;
3336 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3337 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3339 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3340 || USE_STORE_PRE_DECREMENT (mem_mode))
3342 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3343 has_predec[mem_mode]
3344 = memory_address_addr_space_p (mem_mode, addr, as);
3346 if (has_predec[mem_mode])
3347 data->ainc_costs[AINC_PRE_DEC]
3348 = address_cost (addr, mem_mode, as, speed);
3350 if (USE_LOAD_POST_DECREMENT (mem_mode)
3351 || USE_STORE_POST_DECREMENT (mem_mode))
3353 addr = gen_rtx_POST_DEC (address_mode, reg0);
3354 has_postdec[mem_mode]
3355 = memory_address_addr_space_p (mem_mode, addr, as);
3357 if (has_postdec[mem_mode])
3358 data->ainc_costs[AINC_POST_DEC]
3359 = address_cost (addr, mem_mode, as, speed);
3361 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3362 || USE_STORE_PRE_DECREMENT (mem_mode))
3364 addr = gen_rtx_PRE_INC (address_mode, reg0);
3365 has_preinc[mem_mode]
3366 = memory_address_addr_space_p (mem_mode, addr, as);
3368 if (has_preinc[mem_mode])
3369 data->ainc_costs[AINC_PRE_INC]
3370 = address_cost (addr, mem_mode, as, speed);
3372 if (USE_LOAD_POST_INCREMENT (mem_mode)
3373 || USE_STORE_POST_INCREMENT (mem_mode))
3375 addr = gen_rtx_POST_INC (address_mode, reg0);
3376 has_postinc[mem_mode]
3377 = memory_address_addr_space_p (mem_mode, addr, as);
3379 if (has_postinc[mem_mode])
3380 data->ainc_costs[AINC_POST_INC]
3381 = address_cost (addr, mem_mode, as, speed);
3383 for (i = 0; i < 16; i++)
3385 sym_p = i & 1;
3386 var_p = (i >> 1) & 1;
3387 off_p = (i >> 2) & 1;
3388 rat_p = (i >> 3) & 1;
3390 addr = reg0;
3391 if (rat_p)
3392 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3393 gen_int_mode (rat, address_mode));
3395 if (var_p)
3396 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3398 if (sym_p)
3400 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3401 /* ??? We can run into trouble with some backends by presenting
3402 it with symbols which haven't been properly passed through
3403 targetm.encode_section_info. By setting the local bit, we
3404 enhance the probability of things working. */
3405 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3407 if (off_p)
3408 base = gen_rtx_fmt_e (CONST, address_mode,
3409 gen_rtx_fmt_ee
3410 (PLUS, address_mode, base,
3411 gen_int_mode (off, address_mode)));
3413 else if (off_p)
3414 base = gen_int_mode (off, address_mode);
3415 else
3416 base = NULL_RTX;
3418 if (base)
3419 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3421 start_sequence ();
3422 /* To avoid splitting addressing modes, pretend that no cse will
3423 follow. */
3424 old_cse_not_expected = cse_not_expected;
3425 cse_not_expected = true;
3426 addr = memory_address_addr_space (mem_mode, addr, as);
3427 cse_not_expected = old_cse_not_expected;
3428 seq = get_insns ();
3429 end_sequence ();
3431 acost = seq_cost (seq, speed);
3432 acost += address_cost (addr, mem_mode, as, speed);
3434 if (!acost)
3435 acost = 1;
3436 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3439 /* On some targets, it is quite expensive to load symbol to a register,
3440 which makes addresses that contain symbols look much more expensive.
3441 However, the symbol will have to be loaded in any case before the
3442 loop (and quite likely we have it in register already), so it does not
3443 make much sense to penalize them too heavily. So make some final
3444 tweaks for the SYMBOL_PRESENT modes:
3446 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3447 var is cheaper, use this mode with small penalty.
3448 If VAR_PRESENT is true, try whether the mode with
3449 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3450 if this is the case, use it. */
3451 add_c = add_cost (speed, address_mode);
3452 for (i = 0; i < 8; i++)
3454 var_p = i & 1;
3455 off_p = (i >> 1) & 1;
3456 rat_p = (i >> 2) & 1;
3458 acost = data->costs[0][1][off_p][rat_p] + 1;
3459 if (var_p)
3460 acost += add_c;
3462 if (acost < data->costs[1][var_p][off_p][rat_p])
3463 data->costs[1][var_p][off_p][rat_p] = acost;
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3468 fprintf (dump_file, "Address costs:\n");
3470 for (i = 0; i < 16; i++)
3472 sym_p = i & 1;
3473 var_p = (i >> 1) & 1;
3474 off_p = (i >> 2) & 1;
3475 rat_p = (i >> 3) & 1;
3477 fprintf (dump_file, " ");
3478 if (sym_p)
3479 fprintf (dump_file, "sym + ");
3480 if (var_p)
3481 fprintf (dump_file, "var + ");
3482 if (off_p)
3483 fprintf (dump_file, "cst + ");
3484 if (rat_p)
3485 fprintf (dump_file, "rat * ");
3487 acost = data->costs[sym_p][var_p][off_p][rat_p];
3488 fprintf (dump_file, "index costs %d\n", acost);
3490 if (has_predec[mem_mode] || has_postdec[mem_mode]
3491 || has_preinc[mem_mode] || has_postinc[mem_mode])
3492 fprintf (dump_file, " May include autoinc/dec\n");
3493 fprintf (dump_file, "\n");
3496 address_cost_data_list[data_index] = data;
3499 bits = GET_MODE_BITSIZE (address_mode);
3500 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3501 offset &= mask;
3502 if ((offset >> (bits - 1) & 1))
3503 offset |= ~mask;
3504 s_offset = offset;
3506 autoinc = false;
3507 autoinc_type = AINC_NONE;
3508 msize = GET_MODE_SIZE (mem_mode);
3509 autoinc_offset = offset;
3510 if (stmt_after_inc)
3511 autoinc_offset += ratio * cstep;
3512 if (symbol_present || var_present || ratio != 1)
3513 autoinc = false;
3514 else
3516 if (has_postinc[mem_mode] && autoinc_offset == 0
3517 && msize == cstep)
3518 autoinc_type = AINC_POST_INC;
3519 else if (has_postdec[mem_mode] && autoinc_offset == 0
3520 && msize == -cstep)
3521 autoinc_type = AINC_POST_DEC;
3522 else if (has_preinc[mem_mode] && autoinc_offset == msize
3523 && msize == cstep)
3524 autoinc_type = AINC_PRE_INC;
3525 else if (has_predec[mem_mode] && autoinc_offset == -msize
3526 && msize == -cstep)
3527 autoinc_type = AINC_PRE_DEC;
3529 if (autoinc_type != AINC_NONE)
3530 autoinc = true;
3533 cost = 0;
3534 offset_p = (s_offset != 0
3535 && data->min_offset <= s_offset
3536 && s_offset <= data->max_offset);
3537 ratio_p = (ratio != 1
3538 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3540 if (ratio != 1 && !ratio_p)
3541 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3543 if (s_offset && !offset_p && !symbol_present)
3544 cost += add_cost (speed, address_mode);
3546 if (may_autoinc)
3547 *may_autoinc = autoinc;
3548 if (autoinc)
3549 acost = data->ainc_costs[autoinc_type];
3550 else
3551 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3552 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3553 return new_cost (cost + acost, complexity);
3556 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3557 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3558 calculating the operands of EXPR. Returns true if successful, and returns
3559 the cost in COST. */
3561 static bool
3562 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3563 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3565 comp_cost res;
3566 tree op1 = TREE_OPERAND (expr, 1);
3567 tree cst = TREE_OPERAND (mult, 1);
3568 tree multop = TREE_OPERAND (mult, 0);
3569 int m = exact_log2 (int_cst_value (cst));
3570 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3571 int sa_cost;
3572 bool equal_p = false;
3574 if (!(m >= 0 && m < maxm))
3575 return false;
3577 if (operand_equal_p (op1, mult, 0))
3578 equal_p = true;
3580 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3581 ? shiftadd_cost (speed, mode, m)
3582 : (equal_p
3583 ? shiftsub1_cost (speed, mode, m)
3584 : shiftsub0_cost (speed, mode, m)));
3585 res = new_cost (sa_cost, 0);
3586 res = add_costs (res, equal_p ? cost0 : cost1);
3588 STRIP_NOPS (multop);
3589 if (!is_gimple_val (multop))
3590 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3592 *cost = res;
3593 return true;
3596 /* Estimates cost of forcing expression EXPR into a variable. */
3598 static comp_cost
3599 force_expr_to_var_cost (tree expr, bool speed)
3601 static bool costs_initialized = false;
3602 static unsigned integer_cost [2];
3603 static unsigned symbol_cost [2];
3604 static unsigned address_cost [2];
3605 tree op0, op1;
3606 comp_cost cost0, cost1, cost;
3607 enum machine_mode mode;
3609 if (!costs_initialized)
3611 tree type = build_pointer_type (integer_type_node);
3612 tree var, addr;
3613 rtx x;
3614 int i;
3616 var = create_tmp_var_raw (integer_type_node, "test_var");
3617 TREE_STATIC (var) = 1;
3618 x = produce_memory_decl_rtl (var, NULL);
3619 SET_DECL_RTL (var, x);
3621 addr = build1 (ADDR_EXPR, type, var);
3624 for (i = 0; i < 2; i++)
3626 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3627 2000), i);
3629 symbol_cost[i] = computation_cost (addr, i) + 1;
3631 address_cost[i]
3632 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3633 if (dump_file && (dump_flags & TDF_DETAILS))
3635 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3636 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3637 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3638 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3639 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3640 fprintf (dump_file, "\n");
3644 costs_initialized = true;
3647 STRIP_NOPS (expr);
3649 if (SSA_VAR_P (expr))
3650 return no_cost;
3652 if (is_gimple_min_invariant (expr))
3654 if (TREE_CODE (expr) == INTEGER_CST)
3655 return new_cost (integer_cost [speed], 0);
3657 if (TREE_CODE (expr) == ADDR_EXPR)
3659 tree obj = TREE_OPERAND (expr, 0);
3661 if (TREE_CODE (obj) == VAR_DECL
3662 || TREE_CODE (obj) == PARM_DECL
3663 || TREE_CODE (obj) == RESULT_DECL)
3664 return new_cost (symbol_cost [speed], 0);
3667 return new_cost (address_cost [speed], 0);
3670 switch (TREE_CODE (expr))
3672 case POINTER_PLUS_EXPR:
3673 case PLUS_EXPR:
3674 case MINUS_EXPR:
3675 case MULT_EXPR:
3676 op0 = TREE_OPERAND (expr, 0);
3677 op1 = TREE_OPERAND (expr, 1);
3678 STRIP_NOPS (op0);
3679 STRIP_NOPS (op1);
3680 break;
3682 CASE_CONVERT:
3683 case NEGATE_EXPR:
3684 op0 = TREE_OPERAND (expr, 0);
3685 STRIP_NOPS (op0);
3686 op1 = NULL_TREE;
3687 break;
3689 default:
3690 /* Just an arbitrary value, FIXME. */
3691 return new_cost (target_spill_cost[speed], 0);
3694 if (op0 == NULL_TREE
3695 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3696 cost0 = no_cost;
3697 else
3698 cost0 = force_expr_to_var_cost (op0, speed);
3700 if (op1 == NULL_TREE
3701 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3702 cost1 = no_cost;
3703 else
3704 cost1 = force_expr_to_var_cost (op1, speed);
3706 mode = TYPE_MODE (TREE_TYPE (expr));
3707 switch (TREE_CODE (expr))
3709 case POINTER_PLUS_EXPR:
3710 case PLUS_EXPR:
3711 case MINUS_EXPR:
3712 case NEGATE_EXPR:
3713 cost = new_cost (add_cost (speed, mode), 0);
3714 if (TREE_CODE (expr) != NEGATE_EXPR)
3716 tree mult = NULL_TREE;
3717 comp_cost sa_cost;
3718 if (TREE_CODE (op1) == MULT_EXPR)
3719 mult = op1;
3720 else if (TREE_CODE (op0) == MULT_EXPR)
3721 mult = op0;
3723 if (mult != NULL_TREE
3724 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3725 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3726 speed, &sa_cost))
3727 return sa_cost;
3729 break;
3731 CASE_CONVERT:
3733 tree inner_mode, outer_mode;
3734 outer_mode = TREE_TYPE (expr);
3735 inner_mode = TREE_TYPE (op0);
3736 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3737 TYPE_MODE (inner_mode), speed), 0);
3739 break;
3741 case MULT_EXPR:
3742 if (cst_and_fits_in_hwi (op0))
3743 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3744 mode, speed), 0);
3745 else if (cst_and_fits_in_hwi (op1))
3746 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3747 mode, speed), 0);
3748 else
3749 return new_cost (target_spill_cost [speed], 0);
3750 break;
3752 default:
3753 gcc_unreachable ();
3756 cost = add_costs (cost, cost0);
3757 cost = add_costs (cost, cost1);
3759 /* Bound the cost by target_spill_cost. The parts of complicated
3760 computations often are either loop invariant or at least can
3761 be shared between several iv uses, so letting this grow without
3762 limits would not give reasonable results. */
3763 if (cost.cost > (int) target_spill_cost [speed])
3764 cost.cost = target_spill_cost [speed];
3766 return cost;
3769 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3770 invariants the computation depends on. */
3772 static comp_cost
3773 force_var_cost (struct ivopts_data *data,
3774 tree expr, bitmap *depends_on)
3776 if (depends_on)
3778 fd_ivopts_data = data;
3779 walk_tree (&expr, find_depends, depends_on, NULL);
3782 return force_expr_to_var_cost (expr, data->speed);
3785 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3786 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3787 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3788 invariants the computation depends on. */
3790 static comp_cost
3791 split_address_cost (struct ivopts_data *data,
3792 tree addr, bool *symbol_present, bool *var_present,
3793 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3795 tree core;
3796 HOST_WIDE_INT bitsize;
3797 HOST_WIDE_INT bitpos;
3798 tree toffset;
3799 enum machine_mode mode;
3800 int unsignedp, volatilep;
3802 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3803 &unsignedp, &volatilep, false);
3805 if (toffset != 0
3806 || bitpos % BITS_PER_UNIT != 0
3807 || TREE_CODE (core) != VAR_DECL)
3809 *symbol_present = false;
3810 *var_present = true;
3811 fd_ivopts_data = data;
3812 walk_tree (&addr, find_depends, depends_on, NULL);
3813 return new_cost (target_spill_cost[data->speed], 0);
3816 *offset += bitpos / BITS_PER_UNIT;
3817 if (TREE_STATIC (core)
3818 || DECL_EXTERNAL (core))
3820 *symbol_present = true;
3821 *var_present = false;
3822 return no_cost;
3825 *symbol_present = false;
3826 *var_present = true;
3827 return no_cost;
3830 /* Estimates cost of expressing difference of addresses E1 - E2 as
3831 var + symbol + offset. The value of offset is added to OFFSET,
3832 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3833 part is missing. DEPENDS_ON is a set of the invariants the computation
3834 depends on. */
3836 static comp_cost
3837 ptr_difference_cost (struct ivopts_data *data,
3838 tree e1, tree e2, bool *symbol_present, bool *var_present,
3839 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3841 HOST_WIDE_INT diff = 0;
3842 aff_tree aff_e1, aff_e2;
3843 tree type;
3845 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3847 if (ptr_difference_const (e1, e2, &diff))
3849 *offset += diff;
3850 *symbol_present = false;
3851 *var_present = false;
3852 return no_cost;
3855 if (integer_zerop (e2))
3856 return split_address_cost (data, TREE_OPERAND (e1, 0),
3857 symbol_present, var_present, offset, depends_on);
3859 *symbol_present = false;
3860 *var_present = true;
3862 type = signed_type_for (TREE_TYPE (e1));
3863 tree_to_aff_combination (e1, type, &aff_e1);
3864 tree_to_aff_combination (e2, type, &aff_e2);
3865 aff_combination_scale (&aff_e2, -1);
3866 aff_combination_add (&aff_e1, &aff_e2);
3868 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3871 /* Estimates cost of expressing difference E1 - E2 as
3872 var + symbol + offset. The value of offset is added to OFFSET,
3873 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3874 part is missing. DEPENDS_ON is a set of the invariants the computation
3875 depends on. */
3877 static comp_cost
3878 difference_cost (struct ivopts_data *data,
3879 tree e1, tree e2, bool *symbol_present, bool *var_present,
3880 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3882 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3883 unsigned HOST_WIDE_INT off1, off2;
3884 aff_tree aff_e1, aff_e2;
3885 tree type;
3887 e1 = strip_offset (e1, &off1);
3888 e2 = strip_offset (e2, &off2);
3889 *offset += off1 - off2;
3891 STRIP_NOPS (e1);
3892 STRIP_NOPS (e2);
3894 if (TREE_CODE (e1) == ADDR_EXPR)
3895 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3896 offset, depends_on);
3897 *symbol_present = false;
3899 if (operand_equal_p (e1, e2, 0))
3901 *var_present = false;
3902 return no_cost;
3905 *var_present = true;
3907 if (integer_zerop (e2))
3908 return force_var_cost (data, e1, depends_on);
3910 if (integer_zerop (e1))
3912 comp_cost cost = force_var_cost (data, e2, depends_on);
3913 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3914 return cost;
3917 type = signed_type_for (TREE_TYPE (e1));
3918 tree_to_aff_combination (e1, type, &aff_e1);
3919 tree_to_aff_combination (e2, type, &aff_e2);
3920 aff_combination_scale (&aff_e2, -1);
3921 aff_combination_add (&aff_e1, &aff_e2);
3923 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3926 /* Returns true if AFF1 and AFF2 are identical. */
3928 static bool
3929 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3931 unsigned i;
3933 if (aff1->n != aff2->n)
3934 return false;
3936 for (i = 0; i < aff1->n; i++)
3938 if (aff1->elts[i].coef != aff2->elts[i].coef)
3939 return false;
3941 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3942 return false;
3944 return true;
3947 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3949 static int
3950 get_expr_id (struct ivopts_data *data, tree expr)
3952 struct iv_inv_expr_ent ent;
3953 struct iv_inv_expr_ent **slot;
3955 ent.expr = expr;
3956 ent.hash = iterative_hash_expr (expr, 0);
3957 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3958 if (*slot)
3959 return (*slot)->id;
3961 *slot = XNEW (struct iv_inv_expr_ent);
3962 (*slot)->expr = expr;
3963 (*slot)->hash = ent.hash;
3964 (*slot)->id = data->inv_expr_id++;
3965 return (*slot)->id;
3968 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3969 requires a new compiler generated temporary. Returns -1 otherwise.
3970 ADDRESS_P is a flag indicating if the expression is for address
3971 computation. */
3973 static int
3974 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3975 tree cbase, HOST_WIDE_INT ratio,
3976 bool address_p)
3978 aff_tree ubase_aff, cbase_aff;
3979 tree expr, ub, cb;
3981 STRIP_NOPS (ubase);
3982 STRIP_NOPS (cbase);
3983 ub = ubase;
3984 cb = cbase;
3986 if ((TREE_CODE (ubase) == INTEGER_CST)
3987 && (TREE_CODE (cbase) == INTEGER_CST))
3988 return -1;
3990 /* Strips the constant part. */
3991 if (TREE_CODE (ubase) == PLUS_EXPR
3992 || TREE_CODE (ubase) == MINUS_EXPR
3993 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3995 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3996 ubase = TREE_OPERAND (ubase, 0);
3999 /* Strips the constant part. */
4000 if (TREE_CODE (cbase) == PLUS_EXPR
4001 || TREE_CODE (cbase) == MINUS_EXPR
4002 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4004 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4005 cbase = TREE_OPERAND (cbase, 0);
4008 if (address_p)
4010 if (((TREE_CODE (ubase) == SSA_NAME)
4011 || (TREE_CODE (ubase) == ADDR_EXPR
4012 && is_gimple_min_invariant (ubase)))
4013 && (TREE_CODE (cbase) == INTEGER_CST))
4014 return -1;
4016 if (((TREE_CODE (cbase) == SSA_NAME)
4017 || (TREE_CODE (cbase) == ADDR_EXPR
4018 && is_gimple_min_invariant (cbase)))
4019 && (TREE_CODE (ubase) == INTEGER_CST))
4020 return -1;
4023 if (ratio == 1)
4025 if (operand_equal_p (ubase, cbase, 0))
4026 return -1;
4028 if (TREE_CODE (ubase) == ADDR_EXPR
4029 && TREE_CODE (cbase) == ADDR_EXPR)
4031 tree usym, csym;
4033 usym = TREE_OPERAND (ubase, 0);
4034 csym = TREE_OPERAND (cbase, 0);
4035 if (TREE_CODE (usym) == ARRAY_REF)
4037 tree ind = TREE_OPERAND (usym, 1);
4038 if (TREE_CODE (ind) == INTEGER_CST
4039 && tree_fits_shwi_p (ind)
4040 && tree_to_shwi (ind) == 0)
4041 usym = TREE_OPERAND (usym, 0);
4043 if (TREE_CODE (csym) == ARRAY_REF)
4045 tree ind = TREE_OPERAND (csym, 1);
4046 if (TREE_CODE (ind) == INTEGER_CST
4047 && tree_fits_shwi_p (ind)
4048 && tree_to_shwi (ind) == 0)
4049 csym = TREE_OPERAND (csym, 0);
4051 if (operand_equal_p (usym, csym, 0))
4052 return -1;
4054 /* Now do more complex comparison */
4055 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4056 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4057 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4058 return -1;
4061 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4062 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4064 aff_combination_scale (&cbase_aff, -1 * ratio);
4065 aff_combination_add (&ubase_aff, &cbase_aff);
4066 expr = aff_combination_to_tree (&ubase_aff);
4067 return get_expr_id (data, expr);
4072 /* Determines the cost of the computation by that USE is expressed
4073 from induction variable CAND. If ADDRESS_P is true, we just need
4074 to create an address from it, otherwise we want to get it into
4075 register. A set of invariants we depend on is stored in
4076 DEPENDS_ON. AT is the statement at that the value is computed.
4077 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4078 addressing is likely. */
4080 static comp_cost
4081 get_computation_cost_at (struct ivopts_data *data,
4082 struct iv_use *use, struct iv_cand *cand,
4083 bool address_p, bitmap *depends_on, gimple at,
4084 bool *can_autoinc,
4085 int *inv_expr_id)
4087 tree ubase = use->iv->base, ustep = use->iv->step;
4088 tree cbase, cstep;
4089 tree utype = TREE_TYPE (ubase), ctype;
4090 unsigned HOST_WIDE_INT cstepi, offset = 0;
4091 HOST_WIDE_INT ratio, aratio;
4092 bool var_present, symbol_present, stmt_is_after_inc;
4093 comp_cost cost;
4094 widest_int rat;
4095 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4096 enum machine_mode mem_mode = (address_p
4097 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4098 : VOIDmode);
4100 *depends_on = NULL;
4102 /* Only consider real candidates. */
4103 if (!cand->iv)
4104 return infinite_cost;
4106 cbase = cand->iv->base;
4107 cstep = cand->iv->step;
4108 ctype = TREE_TYPE (cbase);
4110 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4112 /* We do not have a precision to express the values of use. */
4113 return infinite_cost;
4116 if (address_p
4117 || (use->iv->base_object
4118 && cand->iv->base_object
4119 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4120 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4122 /* Do not try to express address of an object with computation based
4123 on address of a different object. This may cause problems in rtl
4124 level alias analysis (that does not expect this to be happening,
4125 as this is illegal in C), and would be unlikely to be useful
4126 anyway. */
4127 if (use->iv->base_object
4128 && cand->iv->base_object
4129 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4130 return infinite_cost;
4133 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4135 /* TODO -- add direct handling of this case. */
4136 goto fallback;
4139 /* CSTEPI is removed from the offset in case statement is after the
4140 increment. If the step is not constant, we use zero instead.
4141 This is a bit imprecise (there is the extra addition), but
4142 redundancy elimination is likely to transform the code so that
4143 it uses value of the variable before increment anyway,
4144 so it is not that much unrealistic. */
4145 if (cst_and_fits_in_hwi (cstep))
4146 cstepi = int_cst_value (cstep);
4147 else
4148 cstepi = 0;
4150 if (!constant_multiple_of (ustep, cstep, &rat))
4151 return infinite_cost;
4153 if (wi::fits_shwi_p (rat))
4154 ratio = rat.to_shwi ();
4155 else
4156 return infinite_cost;
4158 STRIP_NOPS (cbase);
4159 ctype = TREE_TYPE (cbase);
4161 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4163 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4164 or ratio == 1, it is better to handle this like
4166 ubase - ratio * cbase + ratio * var
4168 (also holds in the case ratio == -1, TODO. */
4170 if (cst_and_fits_in_hwi (cbase))
4172 offset = - ratio * int_cst_value (cbase);
4173 cost = difference_cost (data,
4174 ubase, build_int_cst (utype, 0),
4175 &symbol_present, &var_present, &offset,
4176 depends_on);
4177 cost.cost /= avg_loop_niter (data->current_loop);
4179 else if (ratio == 1)
4181 tree real_cbase = cbase;
4183 /* Check to see if any adjustment is needed. */
4184 if (cstepi == 0 && stmt_is_after_inc)
4186 aff_tree real_cbase_aff;
4187 aff_tree cstep_aff;
4189 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4190 &real_cbase_aff);
4191 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4193 aff_combination_add (&real_cbase_aff, &cstep_aff);
4194 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4197 cost = difference_cost (data,
4198 ubase, real_cbase,
4199 &symbol_present, &var_present, &offset,
4200 depends_on);
4201 cost.cost /= avg_loop_niter (data->current_loop);
4203 else if (address_p
4204 && !POINTER_TYPE_P (ctype)
4205 && multiplier_allowed_in_address_p
4206 (ratio, mem_mode,
4207 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4209 cbase
4210 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4211 cost = difference_cost (data,
4212 ubase, cbase,
4213 &symbol_present, &var_present, &offset,
4214 depends_on);
4215 cost.cost /= avg_loop_niter (data->current_loop);
4217 else
4219 cost = force_var_cost (data, cbase, depends_on);
4220 cost = add_costs (cost,
4221 difference_cost (data,
4222 ubase, build_int_cst (utype, 0),
4223 &symbol_present, &var_present,
4224 &offset, depends_on));
4225 cost.cost /= avg_loop_niter (data->current_loop);
4226 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4229 if (inv_expr_id)
4231 *inv_expr_id =
4232 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4233 /* Clear depends on. */
4234 if (*inv_expr_id != -1 && depends_on && *depends_on)
4235 bitmap_clear (*depends_on);
4238 /* If we are after the increment, the value of the candidate is higher by
4239 one iteration. */
4240 if (stmt_is_after_inc)
4241 offset -= ratio * cstepi;
4243 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4244 (symbol/var1/const parts may be omitted). If we are looking for an
4245 address, find the cost of addressing this. */
4246 if (address_p)
4247 return add_costs (cost,
4248 get_address_cost (symbol_present, var_present,
4249 offset, ratio, cstepi,
4250 mem_mode,
4251 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4252 speed, stmt_is_after_inc,
4253 can_autoinc));
4255 /* Otherwise estimate the costs for computing the expression. */
4256 if (!symbol_present && !var_present && !offset)
4258 if (ratio != 1)
4259 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4260 return cost;
4263 /* Symbol + offset should be compile-time computable so consider that they
4264 are added once to the variable, if present. */
4265 if (var_present && (symbol_present || offset))
4266 cost.cost += adjust_setup_cost (data,
4267 add_cost (speed, TYPE_MODE (ctype)));
4269 /* Having offset does not affect runtime cost in case it is added to
4270 symbol, but it increases complexity. */
4271 if (offset)
4272 cost.complexity++;
4274 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4276 aratio = ratio > 0 ? ratio : -ratio;
4277 if (aratio != 1)
4278 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4279 return cost;
4281 fallback:
4282 if (can_autoinc)
4283 *can_autoinc = false;
4286 /* Just get the expression, expand it and measure the cost. */
4287 tree comp = get_computation_at (data->current_loop, use, cand, at);
4289 if (!comp)
4290 return infinite_cost;
4292 if (address_p)
4293 comp = build_simple_mem_ref (comp);
4295 return new_cost (computation_cost (comp, speed), 0);
4299 /* Determines the cost of the computation by that USE is expressed
4300 from induction variable CAND. If ADDRESS_P is true, we just need
4301 to create an address from it, otherwise we want to get it into
4302 register. A set of invariants we depend on is stored in
4303 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4304 autoinc addressing is likely. */
4306 static comp_cost
4307 get_computation_cost (struct ivopts_data *data,
4308 struct iv_use *use, struct iv_cand *cand,
4309 bool address_p, bitmap *depends_on,
4310 bool *can_autoinc, int *inv_expr_id)
4312 return get_computation_cost_at (data,
4313 use, cand, address_p, depends_on, use->stmt,
4314 can_autoinc, inv_expr_id);
4317 /* Determines cost of basing replacement of USE on CAND in a generic
4318 expression. */
4320 static bool
4321 determine_use_iv_cost_generic (struct ivopts_data *data,
4322 struct iv_use *use, struct iv_cand *cand)
4324 bitmap depends_on;
4325 comp_cost cost;
4326 int inv_expr_id = -1;
4328 /* The simple case first -- if we need to express value of the preserved
4329 original biv, the cost is 0. This also prevents us from counting the
4330 cost of increment twice -- once at this use and once in the cost of
4331 the candidate. */
4332 if (cand->pos == IP_ORIGINAL
4333 && cand->incremented_at == use->stmt)
4335 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4336 ERROR_MARK, -1);
4337 return true;
4340 cost = get_computation_cost (data, use, cand, false, &depends_on,
4341 NULL, &inv_expr_id);
4343 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4344 inv_expr_id);
4346 return !infinite_cost_p (cost);
4349 /* Determines cost of basing replacement of USE on CAND in an address. */
4351 static bool
4352 determine_use_iv_cost_address (struct ivopts_data *data,
4353 struct iv_use *use, struct iv_cand *cand)
4355 bitmap depends_on;
4356 bool can_autoinc;
4357 int inv_expr_id = -1;
4358 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4359 &can_autoinc, &inv_expr_id);
4361 if (cand->ainc_use == use)
4363 if (can_autoinc)
4364 cost.cost -= cand->cost_step;
4365 /* If we generated the candidate solely for exploiting autoincrement
4366 opportunities, and it turns out it can't be used, set the cost to
4367 infinity to make sure we ignore it. */
4368 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4369 cost = infinite_cost;
4371 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4372 inv_expr_id);
4374 return !infinite_cost_p (cost);
4377 /* Computes value of candidate CAND at position AT in iteration NITER, and
4378 stores it to VAL. */
4380 static void
4381 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4382 aff_tree *val)
4384 aff_tree step, delta, nit;
4385 struct iv *iv = cand->iv;
4386 tree type = TREE_TYPE (iv->base);
4387 tree steptype = type;
4388 if (POINTER_TYPE_P (type))
4389 steptype = sizetype;
4390 steptype = unsigned_type_for (type);
4392 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4393 aff_combination_convert (&step, steptype);
4394 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4395 aff_combination_convert (&nit, steptype);
4396 aff_combination_mult (&nit, &step, &delta);
4397 if (stmt_after_increment (loop, cand, at))
4398 aff_combination_add (&delta, &step);
4400 tree_to_aff_combination (iv->base, type, val);
4401 if (!POINTER_TYPE_P (type))
4402 aff_combination_convert (val, steptype);
4403 aff_combination_add (val, &delta);
4406 /* Returns period of induction variable iv. */
4408 static tree
4409 iv_period (struct iv *iv)
4411 tree step = iv->step, period, type;
4412 tree pow2div;
4414 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4416 type = unsigned_type_for (TREE_TYPE (step));
4417 /* Period of the iv is lcm (step, type_range)/step -1,
4418 i.e., N*type_range/step - 1. Since type range is power
4419 of two, N == (step >> num_of_ending_zeros_binary (step),
4420 so the final result is
4422 (type_range >> num_of_ending_zeros_binary (step)) - 1
4425 pow2div = num_ending_zeros (step);
4427 period = build_low_bits_mask (type,
4428 (TYPE_PRECISION (type)
4429 - tree_to_uhwi (pow2div)));
4431 return period;
4434 /* Returns the comparison operator used when eliminating the iv USE. */
4436 static enum tree_code
4437 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4439 struct loop *loop = data->current_loop;
4440 basic_block ex_bb;
4441 edge exit;
4443 ex_bb = gimple_bb (use->stmt);
4444 exit = EDGE_SUCC (ex_bb, 0);
4445 if (flow_bb_inside_loop_p (loop, exit->dest))
4446 exit = EDGE_SUCC (ex_bb, 1);
4448 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4451 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4452 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4453 calculation is performed in non-wrapping type.
4455 TODO: More generally, we could test for the situation that
4456 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4457 This would require knowing the sign of OFFSET. */
4459 static bool
4460 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4462 enum tree_code code;
4463 tree e1, e2;
4464 aff_tree aff_e1, aff_e2, aff_offset;
4466 if (!nowrap_type_p (TREE_TYPE (base)))
4467 return false;
4469 base = expand_simple_operations (base);
4471 if (TREE_CODE (base) == SSA_NAME)
4473 gimple stmt = SSA_NAME_DEF_STMT (base);
4475 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4476 return false;
4478 code = gimple_assign_rhs_code (stmt);
4479 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4480 return false;
4482 e1 = gimple_assign_rhs1 (stmt);
4483 e2 = gimple_assign_rhs2 (stmt);
4485 else
4487 code = TREE_CODE (base);
4488 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4489 return false;
4490 e1 = TREE_OPERAND (base, 0);
4491 e2 = TREE_OPERAND (base, 1);
4494 /* Use affine expansion as deeper inspection to prove the equality. */
4495 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4496 &aff_e2, &data->name_expansion_cache);
4497 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4498 &aff_offset, &data->name_expansion_cache);
4499 aff_combination_scale (&aff_offset, -1);
4500 switch (code)
4502 case PLUS_EXPR:
4503 aff_combination_add (&aff_e2, &aff_offset);
4504 if (aff_combination_zero_p (&aff_e2))
4505 return true;
4507 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4508 &aff_e1, &data->name_expansion_cache);
4509 aff_combination_add (&aff_e1, &aff_offset);
4510 return aff_combination_zero_p (&aff_e1);
4512 case POINTER_PLUS_EXPR:
4513 aff_combination_add (&aff_e2, &aff_offset);
4514 return aff_combination_zero_p (&aff_e2);
4516 default:
4517 return false;
4521 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4522 comparison with CAND. NITER describes the number of iterations of
4523 the loops. If successful, the comparison in COMP_P is altered accordingly.
4525 We aim to handle the following situation:
4527 sometype *base, *p;
4528 int a, b, i;
4530 i = a;
4531 p = p_0 = base + a;
4535 bla (*p);
4536 p++;
4537 i++;
4539 while (i < b);
4541 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4542 We aim to optimize this to
4544 p = p_0 = base + a;
4547 bla (*p);
4548 p++;
4550 while (p < p_0 - a + b);
4552 This preserves the correctness, since the pointer arithmetics does not
4553 overflow. More precisely:
4555 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4556 overflow in computing it or the values of p.
4557 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4558 overflow. To prove this, we use the fact that p_0 = base + a. */
4560 static bool
4561 iv_elimination_compare_lt (struct ivopts_data *data,
4562 struct iv_cand *cand, enum tree_code *comp_p,
4563 struct tree_niter_desc *niter)
4565 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4566 struct aff_tree nit, tmpa, tmpb;
4567 enum tree_code comp;
4568 HOST_WIDE_INT step;
4570 /* We need to know that the candidate induction variable does not overflow.
4571 While more complex analysis may be used to prove this, for now just
4572 check that the variable appears in the original program and that it
4573 is computed in a type that guarantees no overflows. */
4574 cand_type = TREE_TYPE (cand->iv->base);
4575 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4576 return false;
4578 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4579 the calculation of the BOUND could overflow, making the comparison
4580 invalid. */
4581 if (!data->loop_single_exit_p)
4582 return false;
4584 /* We need to be able to decide whether candidate is increasing or decreasing
4585 in order to choose the right comparison operator. */
4586 if (!cst_and_fits_in_hwi (cand->iv->step))
4587 return false;
4588 step = int_cst_value (cand->iv->step);
4590 /* Check that the number of iterations matches the expected pattern:
4591 a + 1 > b ? 0 : b - a - 1. */
4592 mbz = niter->may_be_zero;
4593 if (TREE_CODE (mbz) == GT_EXPR)
4595 /* Handle a + 1 > b. */
4596 tree op0 = TREE_OPERAND (mbz, 0);
4597 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4599 a = TREE_OPERAND (op0, 0);
4600 b = TREE_OPERAND (mbz, 1);
4602 else
4603 return false;
4605 else if (TREE_CODE (mbz) == LT_EXPR)
4607 tree op1 = TREE_OPERAND (mbz, 1);
4609 /* Handle b < a + 1. */
4610 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4612 a = TREE_OPERAND (op1, 0);
4613 b = TREE_OPERAND (mbz, 0);
4615 else
4616 return false;
4618 else
4619 return false;
4621 /* Expected number of iterations is B - A - 1. Check that it matches
4622 the actual number, i.e., that B - A - NITER = 1. */
4623 tree_to_aff_combination (niter->niter, nit_type, &nit);
4624 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4625 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4626 aff_combination_scale (&nit, -1);
4627 aff_combination_scale (&tmpa, -1);
4628 aff_combination_add (&tmpb, &tmpa);
4629 aff_combination_add (&tmpb, &nit);
4630 if (tmpb.n != 0 || tmpb.offset != 1)
4631 return false;
4633 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4634 overflow. */
4635 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4636 cand->iv->step,
4637 fold_convert (TREE_TYPE (cand->iv->step), a));
4638 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4639 return false;
4641 /* Determine the new comparison operator. */
4642 comp = step < 0 ? GT_EXPR : LT_EXPR;
4643 if (*comp_p == NE_EXPR)
4644 *comp_p = comp;
4645 else if (*comp_p == EQ_EXPR)
4646 *comp_p = invert_tree_comparison (comp, false);
4647 else
4648 gcc_unreachable ();
4650 return true;
4653 /* Check whether it is possible to express the condition in USE by comparison
4654 of candidate CAND. If so, store the value compared with to BOUND, and the
4655 comparison operator to COMP. */
4657 static bool
4658 may_eliminate_iv (struct ivopts_data *data,
4659 struct iv_use *use, struct iv_cand *cand, tree *bound,
4660 enum tree_code *comp)
4662 basic_block ex_bb;
4663 edge exit;
4664 tree period;
4665 struct loop *loop = data->current_loop;
4666 aff_tree bnd;
4667 struct tree_niter_desc *desc = NULL;
4669 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4670 return false;
4672 /* For now works only for exits that dominate the loop latch.
4673 TODO: extend to other conditions inside loop body. */
4674 ex_bb = gimple_bb (use->stmt);
4675 if (use->stmt != last_stmt (ex_bb)
4676 || gimple_code (use->stmt) != GIMPLE_COND
4677 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4678 return false;
4680 exit = EDGE_SUCC (ex_bb, 0);
4681 if (flow_bb_inside_loop_p (loop, exit->dest))
4682 exit = EDGE_SUCC (ex_bb, 1);
4683 if (flow_bb_inside_loop_p (loop, exit->dest))
4684 return false;
4686 desc = niter_for_exit (data, exit);
4687 if (!desc)
4688 return false;
4690 /* Determine whether we can use the variable to test the exit condition.
4691 This is the case iff the period of the induction variable is greater
4692 than the number of iterations for which the exit condition is true. */
4693 period = iv_period (cand->iv);
4695 /* If the number of iterations is constant, compare against it directly. */
4696 if (TREE_CODE (desc->niter) == INTEGER_CST)
4698 /* See cand_value_at. */
4699 if (stmt_after_increment (loop, cand, use->stmt))
4701 if (!tree_int_cst_lt (desc->niter, period))
4702 return false;
4704 else
4706 if (tree_int_cst_lt (period, desc->niter))
4707 return false;
4711 /* If not, and if this is the only possible exit of the loop, see whether
4712 we can get a conservative estimate on the number of iterations of the
4713 entire loop and compare against that instead. */
4714 else
4716 widest_int period_value, max_niter;
4718 max_niter = desc->max;
4719 if (stmt_after_increment (loop, cand, use->stmt))
4720 max_niter += 1;
4721 period_value = wi::to_widest (period);
4722 if (wi::gtu_p (max_niter, period_value))
4724 /* See if we can take advantage of inferred loop bound information. */
4725 if (data->loop_single_exit_p)
4727 if (!max_loop_iterations (loop, &max_niter))
4728 return false;
4729 /* The loop bound is already adjusted by adding 1. */
4730 if (wi::gtu_p (max_niter, period_value))
4731 return false;
4733 else
4734 return false;
4738 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4740 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4741 aff_combination_to_tree (&bnd));
4742 *comp = iv_elimination_compare (data, use);
4744 /* It is unlikely that computing the number of iterations using division
4745 would be more profitable than keeping the original induction variable. */
4746 if (expression_expensive_p (*bound))
4747 return false;
4749 /* Sometimes, it is possible to handle the situation that the number of
4750 iterations may be zero unless additional assumtions by using <
4751 instead of != in the exit condition.
4753 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4754 base the exit condition on it. However, that is often too
4755 expensive. */
4756 if (!integer_zerop (desc->may_be_zero))
4757 return iv_elimination_compare_lt (data, cand, comp, desc);
4759 return true;
4762 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4763 be copied, if is is used in the loop body and DATA->body_includes_call. */
4765 static int
4766 parm_decl_cost (struct ivopts_data *data, tree bound)
4768 tree sbound = bound;
4769 STRIP_NOPS (sbound);
4771 if (TREE_CODE (sbound) == SSA_NAME
4772 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4773 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4774 && data->body_includes_call)
4775 return COSTS_N_INSNS (1);
4777 return 0;
4780 /* Determines cost of basing replacement of USE on CAND in a condition. */
4782 static bool
4783 determine_use_iv_cost_condition (struct ivopts_data *data,
4784 struct iv_use *use, struct iv_cand *cand)
4786 tree bound = NULL_TREE;
4787 struct iv *cmp_iv;
4788 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4789 comp_cost elim_cost, express_cost, cost, bound_cost;
4790 bool ok;
4791 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4792 tree *control_var, *bound_cst;
4793 enum tree_code comp = ERROR_MARK;
4795 /* Only consider real candidates. */
4796 if (!cand->iv)
4798 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4799 ERROR_MARK, -1);
4800 return false;
4803 /* Try iv elimination. */
4804 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4806 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4807 if (elim_cost.cost == 0)
4808 elim_cost.cost = parm_decl_cost (data, bound);
4809 else if (TREE_CODE (bound) == INTEGER_CST)
4810 elim_cost.cost = 0;
4811 /* If we replace a loop condition 'i < n' with 'p < base + n',
4812 depends_on_elim will have 'base' and 'n' set, which implies
4813 that both 'base' and 'n' will be live during the loop. More likely,
4814 'base + n' will be loop invariant, resulting in only one live value
4815 during the loop. So in that case we clear depends_on_elim and set
4816 elim_inv_expr_id instead. */
4817 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4819 elim_inv_expr_id = get_expr_id (data, bound);
4820 bitmap_clear (depends_on_elim);
4822 /* The bound is a loop invariant, so it will be only computed
4823 once. */
4824 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4826 else
4827 elim_cost = infinite_cost;
4829 /* Try expressing the original giv. If it is compared with an invariant,
4830 note that we cannot get rid of it. */
4831 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4832 NULL, &cmp_iv);
4833 gcc_assert (ok);
4835 /* When the condition is a comparison of the candidate IV against
4836 zero, prefer this IV.
4838 TODO: The constant that we're subtracting from the cost should
4839 be target-dependent. This information should be added to the
4840 target costs for each backend. */
4841 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4842 && integer_zerop (*bound_cst)
4843 && (operand_equal_p (*control_var, cand->var_after, 0)
4844 || operand_equal_p (*control_var, cand->var_before, 0)))
4845 elim_cost.cost -= 1;
4847 express_cost = get_computation_cost (data, use, cand, false,
4848 &depends_on_express, NULL,
4849 &express_inv_expr_id);
4850 fd_ivopts_data = data;
4851 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4853 /* Count the cost of the original bound as well. */
4854 bound_cost = force_var_cost (data, *bound_cst, NULL);
4855 if (bound_cost.cost == 0)
4856 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4857 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4858 bound_cost.cost = 0;
4859 express_cost.cost += bound_cost.cost;
4861 /* Choose the better approach, preferring the eliminated IV. */
4862 if (compare_costs (elim_cost, express_cost) <= 0)
4864 cost = elim_cost;
4865 depends_on = depends_on_elim;
4866 depends_on_elim = NULL;
4867 inv_expr_id = elim_inv_expr_id;
4869 else
4871 cost = express_cost;
4872 depends_on = depends_on_express;
4873 depends_on_express = NULL;
4874 bound = NULL_TREE;
4875 comp = ERROR_MARK;
4876 inv_expr_id = express_inv_expr_id;
4879 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4881 if (depends_on_elim)
4882 BITMAP_FREE (depends_on_elim);
4883 if (depends_on_express)
4884 BITMAP_FREE (depends_on_express);
4886 return !infinite_cost_p (cost);
4889 /* Determines cost of basing replacement of USE on CAND. Returns false
4890 if USE cannot be based on CAND. */
4892 static bool
4893 determine_use_iv_cost (struct ivopts_data *data,
4894 struct iv_use *use, struct iv_cand *cand)
4896 switch (use->type)
4898 case USE_NONLINEAR_EXPR:
4899 return determine_use_iv_cost_generic (data, use, cand);
4901 case USE_ADDRESS:
4902 return determine_use_iv_cost_address (data, use, cand);
4904 case USE_COMPARE:
4905 return determine_use_iv_cost_condition (data, use, cand);
4907 default:
4908 gcc_unreachable ();
4912 /* Return true if get_computation_cost indicates that autoincrement is
4913 a possibility for the pair of USE and CAND, false otherwise. */
4915 static bool
4916 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4917 struct iv_cand *cand)
4919 bitmap depends_on;
4920 bool can_autoinc;
4921 comp_cost cost;
4923 if (use->type != USE_ADDRESS)
4924 return false;
4926 cost = get_computation_cost (data, use, cand, true, &depends_on,
4927 &can_autoinc, NULL);
4929 BITMAP_FREE (depends_on);
4931 return !infinite_cost_p (cost) && can_autoinc;
4934 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4935 use that allows autoincrement, and set their AINC_USE if possible. */
4937 static void
4938 set_autoinc_for_original_candidates (struct ivopts_data *data)
4940 unsigned i, j;
4942 for (i = 0; i < n_iv_cands (data); i++)
4944 struct iv_cand *cand = iv_cand (data, i);
4945 struct iv_use *closest_before = NULL;
4946 struct iv_use *closest_after = NULL;
4947 if (cand->pos != IP_ORIGINAL)
4948 continue;
4950 for (j = 0; j < n_iv_uses (data); j++)
4952 struct iv_use *use = iv_use (data, j);
4953 unsigned uid = gimple_uid (use->stmt);
4955 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4956 continue;
4958 if (uid < gimple_uid (cand->incremented_at)
4959 && (closest_before == NULL
4960 || uid > gimple_uid (closest_before->stmt)))
4961 closest_before = use;
4963 if (uid > gimple_uid (cand->incremented_at)
4964 && (closest_after == NULL
4965 || uid < gimple_uid (closest_after->stmt)))
4966 closest_after = use;
4969 if (closest_before != NULL
4970 && autoinc_possible_for_pair (data, closest_before, cand))
4971 cand->ainc_use = closest_before;
4972 else if (closest_after != NULL
4973 && autoinc_possible_for_pair (data, closest_after, cand))
4974 cand->ainc_use = closest_after;
4978 /* Finds the candidates for the induction variables. */
4980 static void
4981 find_iv_candidates (struct ivopts_data *data)
4983 /* Add commonly used ivs. */
4984 add_standard_iv_candidates (data);
4986 /* Add old induction variables. */
4987 add_old_ivs_candidates (data);
4989 /* Add induction variables derived from uses. */
4990 add_derived_ivs_candidates (data);
4992 set_autoinc_for_original_candidates (data);
4994 /* Record the important candidates. */
4995 record_important_candidates (data);
4998 /* Determines costs of basing the use of the iv on an iv candidate. */
5000 static void
5001 determine_use_iv_costs (struct ivopts_data *data)
5003 unsigned i, j;
5004 struct iv_use *use;
5005 struct iv_cand *cand;
5006 bitmap to_clear = BITMAP_ALLOC (NULL);
5008 alloc_use_cost_map (data);
5010 for (i = 0; i < n_iv_uses (data); i++)
5012 use = iv_use (data, i);
5014 if (data->consider_all_candidates)
5016 for (j = 0; j < n_iv_cands (data); j++)
5018 cand = iv_cand (data, j);
5019 determine_use_iv_cost (data, use, cand);
5022 else
5024 bitmap_iterator bi;
5026 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5028 cand = iv_cand (data, j);
5029 if (!determine_use_iv_cost (data, use, cand))
5030 bitmap_set_bit (to_clear, j);
5033 /* Remove the candidates for that the cost is infinite from
5034 the list of related candidates. */
5035 bitmap_and_compl_into (use->related_cands, to_clear);
5036 bitmap_clear (to_clear);
5040 BITMAP_FREE (to_clear);
5042 if (dump_file && (dump_flags & TDF_DETAILS))
5044 fprintf (dump_file, "Use-candidate costs:\n");
5046 for (i = 0; i < n_iv_uses (data); i++)
5048 use = iv_use (data, i);
5050 fprintf (dump_file, "Use %d:\n", i);
5051 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5052 for (j = 0; j < use->n_map_members; j++)
5054 if (!use->cost_map[j].cand
5055 || infinite_cost_p (use->cost_map[j].cost))
5056 continue;
5058 fprintf (dump_file, " %d\t%d\t%d\t",
5059 use->cost_map[j].cand->id,
5060 use->cost_map[j].cost.cost,
5061 use->cost_map[j].cost.complexity);
5062 if (use->cost_map[j].depends_on)
5063 bitmap_print (dump_file,
5064 use->cost_map[j].depends_on, "","");
5065 if (use->cost_map[j].inv_expr_id != -1)
5066 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5067 fprintf (dump_file, "\n");
5070 fprintf (dump_file, "\n");
5072 fprintf (dump_file, "\n");
5076 /* Determines cost of the candidate CAND. */
5078 static void
5079 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5081 comp_cost cost_base;
5082 unsigned cost, cost_step;
5083 tree base;
5085 if (!cand->iv)
5087 cand->cost = 0;
5088 return;
5091 /* There are two costs associated with the candidate -- its increment
5092 and its initialization. The second is almost negligible for any loop
5093 that rolls enough, so we take it just very little into account. */
5095 base = cand->iv->base;
5096 cost_base = force_var_cost (data, base, NULL);
5097 /* It will be exceptional that the iv register happens to be initialized with
5098 the proper value at no cost. In general, there will at least be a regcopy
5099 or a const set. */
5100 if (cost_base.cost == 0)
5101 cost_base.cost = COSTS_N_INSNS (1);
5102 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5104 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5106 /* Prefer the original ivs unless we may gain something by replacing it.
5107 The reason is to make debugging simpler; so this is not relevant for
5108 artificial ivs created by other optimization passes. */
5109 if (cand->pos != IP_ORIGINAL
5110 || !SSA_NAME_VAR (cand->var_before)
5111 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5112 cost++;
5114 /* Prefer not to insert statements into latch unless there are some
5115 already (so that we do not create unnecessary jumps). */
5116 if (cand->pos == IP_END
5117 && empty_block_p (ip_end_pos (data->current_loop)))
5118 cost++;
5120 cand->cost = cost;
5121 cand->cost_step = cost_step;
5124 /* Determines costs of computation of the candidates. */
5126 static void
5127 determine_iv_costs (struct ivopts_data *data)
5129 unsigned i;
5131 if (dump_file && (dump_flags & TDF_DETAILS))
5133 fprintf (dump_file, "Candidate costs:\n");
5134 fprintf (dump_file, " cand\tcost\n");
5137 for (i = 0; i < n_iv_cands (data); i++)
5139 struct iv_cand *cand = iv_cand (data, i);
5141 determine_iv_cost (data, cand);
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5144 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5147 if (dump_file && (dump_flags & TDF_DETAILS))
5148 fprintf (dump_file, "\n");
5151 /* Calculates cost for having SIZE induction variables. */
5153 static unsigned
5154 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5156 /* We add size to the cost, so that we prefer eliminating ivs
5157 if possible. */
5158 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5159 data->body_includes_call);
5162 /* For each size of the induction variable set determine the penalty. */
5164 static void
5165 determine_set_costs (struct ivopts_data *data)
5167 unsigned j, n;
5168 gimple phi;
5169 gimple_stmt_iterator psi;
5170 tree op;
5171 struct loop *loop = data->current_loop;
5172 bitmap_iterator bi;
5174 if (dump_file && (dump_flags & TDF_DETAILS))
5176 fprintf (dump_file, "Global costs:\n");
5177 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5178 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5179 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5180 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5183 n = 0;
5184 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5186 phi = gsi_stmt (psi);
5187 op = PHI_RESULT (phi);
5189 if (virtual_operand_p (op))
5190 continue;
5192 if (get_iv (data, op))
5193 continue;
5195 n++;
5198 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5200 struct version_info *info = ver_info (data, j);
5202 if (info->inv_id && info->has_nonlin_use)
5203 n++;
5206 data->regs_used = n;
5207 if (dump_file && (dump_flags & TDF_DETAILS))
5208 fprintf (dump_file, " regs_used %d\n", n);
5210 if (dump_file && (dump_flags & TDF_DETAILS))
5212 fprintf (dump_file, " cost for size:\n");
5213 fprintf (dump_file, " ivs\tcost\n");
5214 for (j = 0; j <= 2 * target_avail_regs; j++)
5215 fprintf (dump_file, " %d\t%d\n", j,
5216 ivopts_global_cost_for_size (data, j));
5217 fprintf (dump_file, "\n");
5221 /* Returns true if A is a cheaper cost pair than B. */
5223 static bool
5224 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5226 int cmp;
5228 if (!a)
5229 return false;
5231 if (!b)
5232 return true;
5234 cmp = compare_costs (a->cost, b->cost);
5235 if (cmp < 0)
5236 return true;
5238 if (cmp > 0)
5239 return false;
5241 /* In case the costs are the same, prefer the cheaper candidate. */
5242 if (a->cand->cost < b->cand->cost)
5243 return true;
5245 return false;
5249 /* Returns candidate by that USE is expressed in IVS. */
5251 static struct cost_pair *
5252 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5254 return ivs->cand_for_use[use->id];
5257 /* Computes the cost field of IVS structure. */
5259 static void
5260 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5262 comp_cost cost = ivs->cand_use_cost;
5264 cost.cost += ivs->cand_cost;
5266 cost.cost += ivopts_global_cost_for_size (data,
5267 ivs->n_regs + ivs->num_used_inv_expr);
5269 ivs->cost = cost;
5272 /* Remove invariants in set INVS to set IVS. */
5274 static void
5275 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5277 bitmap_iterator bi;
5278 unsigned iid;
5280 if (!invs)
5281 return;
5283 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5285 ivs->n_invariant_uses[iid]--;
5286 if (ivs->n_invariant_uses[iid] == 0)
5287 ivs->n_regs--;
5291 /* Set USE not to be expressed by any candidate in IVS. */
5293 static void
5294 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5295 struct iv_use *use)
5297 unsigned uid = use->id, cid;
5298 struct cost_pair *cp;
5300 cp = ivs->cand_for_use[uid];
5301 if (!cp)
5302 return;
5303 cid = cp->cand->id;
5305 ivs->bad_uses++;
5306 ivs->cand_for_use[uid] = NULL;
5307 ivs->n_cand_uses[cid]--;
5309 if (ivs->n_cand_uses[cid] == 0)
5311 bitmap_clear_bit (ivs->cands, cid);
5312 /* Do not count the pseudocandidates. */
5313 if (cp->cand->iv)
5314 ivs->n_regs--;
5315 ivs->n_cands--;
5316 ivs->cand_cost -= cp->cand->cost;
5318 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5321 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5323 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5325 if (cp->inv_expr_id != -1)
5327 ivs->used_inv_expr[cp->inv_expr_id]--;
5328 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5329 ivs->num_used_inv_expr--;
5331 iv_ca_recount_cost (data, ivs);
5334 /* Add invariants in set INVS to set IVS. */
5336 static void
5337 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5339 bitmap_iterator bi;
5340 unsigned iid;
5342 if (!invs)
5343 return;
5345 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5347 ivs->n_invariant_uses[iid]++;
5348 if (ivs->n_invariant_uses[iid] == 1)
5349 ivs->n_regs++;
5353 /* Set cost pair for USE in set IVS to CP. */
5355 static void
5356 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5357 struct iv_use *use, struct cost_pair *cp)
5359 unsigned uid = use->id, cid;
5361 if (ivs->cand_for_use[uid] == cp)
5362 return;
5364 if (ivs->cand_for_use[uid])
5365 iv_ca_set_no_cp (data, ivs, use);
5367 if (cp)
5369 cid = cp->cand->id;
5371 ivs->bad_uses--;
5372 ivs->cand_for_use[uid] = cp;
5373 ivs->n_cand_uses[cid]++;
5374 if (ivs->n_cand_uses[cid] == 1)
5376 bitmap_set_bit (ivs->cands, cid);
5377 /* Do not count the pseudocandidates. */
5378 if (cp->cand->iv)
5379 ivs->n_regs++;
5380 ivs->n_cands++;
5381 ivs->cand_cost += cp->cand->cost;
5383 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5386 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5387 iv_ca_set_add_invariants (ivs, cp->depends_on);
5389 if (cp->inv_expr_id != -1)
5391 ivs->used_inv_expr[cp->inv_expr_id]++;
5392 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5393 ivs->num_used_inv_expr++;
5395 iv_ca_recount_cost (data, ivs);
5399 /* Extend set IVS by expressing USE by some of the candidates in it
5400 if possible. Consider all important candidates if candidates in
5401 set IVS don't give any result. */
5403 static void
5404 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5405 struct iv_use *use)
5407 struct cost_pair *best_cp = NULL, *cp;
5408 bitmap_iterator bi;
5409 unsigned i;
5410 struct iv_cand *cand;
5412 gcc_assert (ivs->upto >= use->id);
5413 ivs->upto++;
5414 ivs->bad_uses++;
5416 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5418 cand = iv_cand (data, i);
5419 cp = get_use_iv_cost (data, use, cand);
5420 if (cheaper_cost_pair (cp, best_cp))
5421 best_cp = cp;
5424 if (best_cp == NULL)
5426 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5428 cand = iv_cand (data, i);
5429 cp = get_use_iv_cost (data, use, cand);
5430 if (cheaper_cost_pair (cp, best_cp))
5431 best_cp = cp;
5435 iv_ca_set_cp (data, ivs, use, best_cp);
5438 /* Get cost for assignment IVS. */
5440 static comp_cost
5441 iv_ca_cost (struct iv_ca *ivs)
5443 /* This was a conditional expression but it triggered a bug in
5444 Sun C 5.5. */
5445 if (ivs->bad_uses)
5446 return infinite_cost;
5447 else
5448 return ivs->cost;
5451 /* Returns true if all dependences of CP are among invariants in IVS. */
5453 static bool
5454 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5456 unsigned i;
5457 bitmap_iterator bi;
5459 if (!cp->depends_on)
5460 return true;
5462 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5464 if (ivs->n_invariant_uses[i] == 0)
5465 return false;
5468 return true;
5471 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5472 it before NEXT_CHANGE. */
5474 static struct iv_ca_delta *
5475 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5476 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5478 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5480 change->use = use;
5481 change->old_cp = old_cp;
5482 change->new_cp = new_cp;
5483 change->next_change = next_change;
5485 return change;
5488 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5489 are rewritten. */
5491 static struct iv_ca_delta *
5492 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5494 struct iv_ca_delta *last;
5496 if (!l2)
5497 return l1;
5499 if (!l1)
5500 return l2;
5502 for (last = l1; last->next_change; last = last->next_change)
5503 continue;
5504 last->next_change = l2;
5506 return l1;
5509 /* Reverse the list of changes DELTA, forming the inverse to it. */
5511 static struct iv_ca_delta *
5512 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5514 struct iv_ca_delta *act, *next, *prev = NULL;
5515 struct cost_pair *tmp;
5517 for (act = delta; act; act = next)
5519 next = act->next_change;
5520 act->next_change = prev;
5521 prev = act;
5523 tmp = act->old_cp;
5524 act->old_cp = act->new_cp;
5525 act->new_cp = tmp;
5528 return prev;
5531 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5532 reverted instead. */
5534 static void
5535 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5536 struct iv_ca_delta *delta, bool forward)
5538 struct cost_pair *from, *to;
5539 struct iv_ca_delta *act;
5541 if (!forward)
5542 delta = iv_ca_delta_reverse (delta);
5544 for (act = delta; act; act = act->next_change)
5546 from = act->old_cp;
5547 to = act->new_cp;
5548 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5549 iv_ca_set_cp (data, ivs, act->use, to);
5552 if (!forward)
5553 iv_ca_delta_reverse (delta);
5556 /* Returns true if CAND is used in IVS. */
5558 static bool
5559 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5561 return ivs->n_cand_uses[cand->id] > 0;
5564 /* Returns number of induction variable candidates in the set IVS. */
5566 static unsigned
5567 iv_ca_n_cands (struct iv_ca *ivs)
5569 return ivs->n_cands;
5572 /* Free the list of changes DELTA. */
5574 static void
5575 iv_ca_delta_free (struct iv_ca_delta **delta)
5577 struct iv_ca_delta *act, *next;
5579 for (act = *delta; act; act = next)
5581 next = act->next_change;
5582 free (act);
5585 *delta = NULL;
5588 /* Allocates new iv candidates assignment. */
5590 static struct iv_ca *
5591 iv_ca_new (struct ivopts_data *data)
5593 struct iv_ca *nw = XNEW (struct iv_ca);
5595 nw->upto = 0;
5596 nw->bad_uses = 0;
5597 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5598 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5599 nw->cands = BITMAP_ALLOC (NULL);
5600 nw->n_cands = 0;
5601 nw->n_regs = 0;
5602 nw->cand_use_cost = no_cost;
5603 nw->cand_cost = 0;
5604 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5605 nw->cost = no_cost;
5606 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5607 nw->num_used_inv_expr = 0;
5609 return nw;
5612 /* Free memory occupied by the set IVS. */
5614 static void
5615 iv_ca_free (struct iv_ca **ivs)
5617 free ((*ivs)->cand_for_use);
5618 free ((*ivs)->n_cand_uses);
5619 BITMAP_FREE ((*ivs)->cands);
5620 free ((*ivs)->n_invariant_uses);
5621 free ((*ivs)->used_inv_expr);
5622 free (*ivs);
5623 *ivs = NULL;
5626 /* Dumps IVS to FILE. */
5628 static void
5629 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5631 const char *pref = " invariants ";
5632 unsigned i;
5633 comp_cost cost = iv_ca_cost (ivs);
5635 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5636 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5637 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5638 bitmap_print (file, ivs->cands, " candidates: ","\n");
5640 for (i = 0; i < ivs->upto; i++)
5642 struct iv_use *use = iv_use (data, i);
5643 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5644 if (cp)
5645 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5646 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5647 else
5648 fprintf (file, " use:%d --> ??\n", use->id);
5651 for (i = 1; i <= data->max_inv_id; i++)
5652 if (ivs->n_invariant_uses[i])
5654 fprintf (file, "%s%d", pref, i);
5655 pref = ", ";
5657 fprintf (file, "\n\n");
5660 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5661 new set, and store differences in DELTA. Number of induction variables
5662 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5663 the function will try to find a solution with mimimal iv candidates. */
5665 static comp_cost
5666 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5667 struct iv_cand *cand, struct iv_ca_delta **delta,
5668 unsigned *n_ivs, bool min_ncand)
5670 unsigned i;
5671 comp_cost cost;
5672 struct iv_use *use;
5673 struct cost_pair *old_cp, *new_cp;
5675 *delta = NULL;
5676 for (i = 0; i < ivs->upto; i++)
5678 use = iv_use (data, i);
5679 old_cp = iv_ca_cand_for_use (ivs, use);
5681 if (old_cp
5682 && old_cp->cand == cand)
5683 continue;
5685 new_cp = get_use_iv_cost (data, use, cand);
5686 if (!new_cp)
5687 continue;
5689 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5690 continue;
5692 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5693 continue;
5695 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5698 iv_ca_delta_commit (data, ivs, *delta, true);
5699 cost = iv_ca_cost (ivs);
5700 if (n_ivs)
5701 *n_ivs = iv_ca_n_cands (ivs);
5702 iv_ca_delta_commit (data, ivs, *delta, false);
5704 return cost;
5707 /* Try narrowing set IVS by removing CAND. Return the cost of
5708 the new set and store the differences in DELTA. START is
5709 the candidate with which we start narrowing. */
5711 static comp_cost
5712 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5713 struct iv_cand *cand, struct iv_cand *start,
5714 struct iv_ca_delta **delta)
5716 unsigned i, ci;
5717 struct iv_use *use;
5718 struct cost_pair *old_cp, *new_cp, *cp;
5719 bitmap_iterator bi;
5720 struct iv_cand *cnd;
5721 comp_cost cost, best_cost, acost;
5723 *delta = NULL;
5724 for (i = 0; i < n_iv_uses (data); i++)
5726 use = iv_use (data, i);
5728 old_cp = iv_ca_cand_for_use (ivs, use);
5729 if (old_cp->cand != cand)
5730 continue;
5732 best_cost = iv_ca_cost (ivs);
5733 /* Start narrowing with START. */
5734 new_cp = get_use_iv_cost (data, use, start);
5736 if (data->consider_all_candidates)
5738 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5740 if (ci == cand->id || (start && ci == start->id))
5741 continue;
5743 cnd = iv_cand (data, ci);
5745 cp = get_use_iv_cost (data, use, cnd);
5746 if (!cp)
5747 continue;
5749 iv_ca_set_cp (data, ivs, use, cp);
5750 acost = iv_ca_cost (ivs);
5752 if (compare_costs (acost, best_cost) < 0)
5754 best_cost = acost;
5755 new_cp = cp;
5759 else
5761 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5763 if (ci == cand->id || (start && ci == start->id))
5764 continue;
5766 cnd = iv_cand (data, ci);
5768 cp = get_use_iv_cost (data, use, cnd);
5769 if (!cp)
5770 continue;
5772 iv_ca_set_cp (data, ivs, use, cp);
5773 acost = iv_ca_cost (ivs);
5775 if (compare_costs (acost, best_cost) < 0)
5777 best_cost = acost;
5778 new_cp = cp;
5782 /* Restore to old cp for use. */
5783 iv_ca_set_cp (data, ivs, use, old_cp);
5785 if (!new_cp)
5787 iv_ca_delta_free (delta);
5788 return infinite_cost;
5791 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5794 iv_ca_delta_commit (data, ivs, *delta, true);
5795 cost = iv_ca_cost (ivs);
5796 iv_ca_delta_commit (data, ivs, *delta, false);
5798 return cost;
5801 /* Try optimizing the set of candidates IVS by removing candidates different
5802 from to EXCEPT_CAND from it. Return cost of the new set, and store
5803 differences in DELTA. */
5805 static comp_cost
5806 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5807 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5809 bitmap_iterator bi;
5810 struct iv_ca_delta *act_delta, *best_delta;
5811 unsigned i;
5812 comp_cost best_cost, acost;
5813 struct iv_cand *cand;
5815 best_delta = NULL;
5816 best_cost = iv_ca_cost (ivs);
5818 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5820 cand = iv_cand (data, i);
5822 if (cand == except_cand)
5823 continue;
5825 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5827 if (compare_costs (acost, best_cost) < 0)
5829 best_cost = acost;
5830 iv_ca_delta_free (&best_delta);
5831 best_delta = act_delta;
5833 else
5834 iv_ca_delta_free (&act_delta);
5837 if (!best_delta)
5839 *delta = NULL;
5840 return best_cost;
5843 /* Recurse to possibly remove other unnecessary ivs. */
5844 iv_ca_delta_commit (data, ivs, best_delta, true);
5845 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5846 iv_ca_delta_commit (data, ivs, best_delta, false);
5847 *delta = iv_ca_delta_join (best_delta, *delta);
5848 return best_cost;
5851 /* Tries to extend the sets IVS in the best possible way in order
5852 to express the USE. If ORIGINALP is true, prefer candidates from
5853 the original set of IVs, otherwise favor important candidates not
5854 based on any memory object. */
5856 static bool
5857 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5858 struct iv_use *use, bool originalp)
5860 comp_cost best_cost, act_cost;
5861 unsigned i;
5862 bitmap_iterator bi;
5863 struct iv_cand *cand;
5864 struct iv_ca_delta *best_delta = NULL, *act_delta;
5865 struct cost_pair *cp;
5867 iv_ca_add_use (data, ivs, use);
5868 best_cost = iv_ca_cost (ivs);
5869 cp = iv_ca_cand_for_use (ivs, use);
5870 if (cp)
5872 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5873 iv_ca_set_no_cp (data, ivs, use);
5876 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5877 first try important candidates not based on any memory object. Only if
5878 this fails, try the specific ones. Rationale -- in loops with many
5879 variables the best choice often is to use just one generic biv. If we
5880 added here many ivs specific to the uses, the optimization algorithm later
5881 would be likely to get stuck in a local minimum, thus causing us to create
5882 too many ivs. The approach from few ivs to more seems more likely to be
5883 successful -- starting from few ivs, replacing an expensive use by a
5884 specific iv should always be a win. */
5885 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5887 cand = iv_cand (data, i);
5889 if (originalp && cand->pos !=IP_ORIGINAL)
5890 continue;
5892 if (!originalp && cand->iv->base_object != NULL_TREE)
5893 continue;
5895 if (iv_ca_cand_used_p (ivs, cand))
5896 continue;
5898 cp = get_use_iv_cost (data, use, cand);
5899 if (!cp)
5900 continue;
5902 iv_ca_set_cp (data, ivs, use, cp);
5903 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5904 true);
5905 iv_ca_set_no_cp (data, ivs, use);
5906 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5908 if (compare_costs (act_cost, best_cost) < 0)
5910 best_cost = act_cost;
5912 iv_ca_delta_free (&best_delta);
5913 best_delta = act_delta;
5915 else
5916 iv_ca_delta_free (&act_delta);
5919 if (infinite_cost_p (best_cost))
5921 for (i = 0; i < use->n_map_members; i++)
5923 cp = use->cost_map + i;
5924 cand = cp->cand;
5925 if (!cand)
5926 continue;
5928 /* Already tried this. */
5929 if (cand->important)
5931 if (originalp && cand->pos == IP_ORIGINAL)
5932 continue;
5933 if (!originalp && cand->iv->base_object == NULL_TREE)
5934 continue;
5937 if (iv_ca_cand_used_p (ivs, cand))
5938 continue;
5940 act_delta = NULL;
5941 iv_ca_set_cp (data, ivs, use, cp);
5942 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5943 iv_ca_set_no_cp (data, ivs, use);
5944 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5945 cp, act_delta);
5947 if (compare_costs (act_cost, best_cost) < 0)
5949 best_cost = act_cost;
5951 if (best_delta)
5952 iv_ca_delta_free (&best_delta);
5953 best_delta = act_delta;
5955 else
5956 iv_ca_delta_free (&act_delta);
5960 iv_ca_delta_commit (data, ivs, best_delta, true);
5961 iv_ca_delta_free (&best_delta);
5963 return !infinite_cost_p (best_cost);
5966 /* Finds an initial assignment of candidates to uses. */
5968 static struct iv_ca *
5969 get_initial_solution (struct ivopts_data *data, bool originalp)
5971 struct iv_ca *ivs = iv_ca_new (data);
5972 unsigned i;
5974 for (i = 0; i < n_iv_uses (data); i++)
5975 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5977 iv_ca_free (&ivs);
5978 return NULL;
5981 return ivs;
5984 /* Tries to improve set of induction variables IVS. */
5986 static bool
5987 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5989 unsigned i, n_ivs;
5990 comp_cost acost, best_cost = iv_ca_cost (ivs);
5991 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5992 struct iv_cand *cand;
5994 /* Try extending the set of induction variables by one. */
5995 for (i = 0; i < n_iv_cands (data); i++)
5997 cand = iv_cand (data, i);
5999 if (iv_ca_cand_used_p (ivs, cand))
6000 continue;
6002 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6003 if (!act_delta)
6004 continue;
6006 /* If we successfully added the candidate and the set is small enough,
6007 try optimizing it by removing other candidates. */
6008 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6010 iv_ca_delta_commit (data, ivs, act_delta, true);
6011 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6012 iv_ca_delta_commit (data, ivs, act_delta, false);
6013 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6016 if (compare_costs (acost, best_cost) < 0)
6018 best_cost = acost;
6019 iv_ca_delta_free (&best_delta);
6020 best_delta = act_delta;
6022 else
6023 iv_ca_delta_free (&act_delta);
6026 if (!best_delta)
6028 /* Try removing the candidates from the set instead. */
6029 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6031 /* Nothing more we can do. */
6032 if (!best_delta)
6033 return false;
6036 iv_ca_delta_commit (data, ivs, best_delta, true);
6037 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6038 iv_ca_delta_free (&best_delta);
6039 return true;
6042 /* Attempts to find the optimal set of induction variables. We do simple
6043 greedy heuristic -- we try to replace at most one candidate in the selected
6044 solution and remove the unused ivs while this improves the cost. */
6046 static struct iv_ca *
6047 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6049 struct iv_ca *set;
6051 /* Get the initial solution. */
6052 set = get_initial_solution (data, originalp);
6053 if (!set)
6055 if (dump_file && (dump_flags & TDF_DETAILS))
6056 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6057 return NULL;
6060 if (dump_file && (dump_flags & TDF_DETAILS))
6062 fprintf (dump_file, "Initial set of candidates:\n");
6063 iv_ca_dump (data, dump_file, set);
6066 while (try_improve_iv_set (data, set))
6068 if (dump_file && (dump_flags & TDF_DETAILS))
6070 fprintf (dump_file, "Improved to:\n");
6071 iv_ca_dump (data, dump_file, set);
6075 return set;
6078 static struct iv_ca *
6079 find_optimal_iv_set (struct ivopts_data *data)
6081 unsigned i;
6082 struct iv_ca *set, *origset;
6083 struct iv_use *use;
6084 comp_cost cost, origcost;
6086 /* Determine the cost based on a strategy that starts with original IVs,
6087 and try again using a strategy that prefers candidates not based
6088 on any IVs. */
6089 origset = find_optimal_iv_set_1 (data, true);
6090 set = find_optimal_iv_set_1 (data, false);
6092 if (!origset && !set)
6093 return NULL;
6095 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6096 cost = set ? iv_ca_cost (set) : infinite_cost;
6098 if (dump_file && (dump_flags & TDF_DETAILS))
6100 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6101 origcost.cost, origcost.complexity);
6102 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6103 cost.cost, cost.complexity);
6106 /* Choose the one with the best cost. */
6107 if (compare_costs (origcost, cost) <= 0)
6109 if (set)
6110 iv_ca_free (&set);
6111 set = origset;
6113 else if (origset)
6114 iv_ca_free (&origset);
6116 for (i = 0; i < n_iv_uses (data); i++)
6118 use = iv_use (data, i);
6119 use->selected = iv_ca_cand_for_use (set, use)->cand;
6122 return set;
6125 /* Creates a new induction variable corresponding to CAND. */
6127 static void
6128 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6130 gimple_stmt_iterator incr_pos;
6131 tree base;
6132 bool after = false;
6134 if (!cand->iv)
6135 return;
6137 switch (cand->pos)
6139 case IP_NORMAL:
6140 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6141 break;
6143 case IP_END:
6144 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6145 after = true;
6146 break;
6148 case IP_AFTER_USE:
6149 after = true;
6150 /* fall through */
6151 case IP_BEFORE_USE:
6152 incr_pos = gsi_for_stmt (cand->incremented_at);
6153 break;
6155 case IP_ORIGINAL:
6156 /* Mark that the iv is preserved. */
6157 name_info (data, cand->var_before)->preserve_biv = true;
6158 name_info (data, cand->var_after)->preserve_biv = true;
6160 /* Rewrite the increment so that it uses var_before directly. */
6161 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6162 return;
6165 gimple_add_tmp_var (cand->var_before);
6167 base = unshare_expr (cand->iv->base);
6169 create_iv (base, unshare_expr (cand->iv->step),
6170 cand->var_before, data->current_loop,
6171 &incr_pos, after, &cand->var_before, &cand->var_after);
6174 /* Creates new induction variables described in SET. */
6176 static void
6177 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6179 unsigned i;
6180 struct iv_cand *cand;
6181 bitmap_iterator bi;
6183 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6185 cand = iv_cand (data, i);
6186 create_new_iv (data, cand);
6189 if (dump_file && (dump_flags & TDF_DETAILS))
6191 fprintf (dump_file, "\nSelected IV set: \n");
6192 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6194 cand = iv_cand (data, i);
6195 dump_cand (dump_file, cand);
6197 fprintf (dump_file, "\n");
6201 /* Rewrites USE (definition of iv used in a nonlinear expression)
6202 using candidate CAND. */
6204 static void
6205 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6206 struct iv_use *use, struct iv_cand *cand)
6208 tree comp;
6209 tree op, tgt;
6210 gimple ass;
6211 gimple_stmt_iterator bsi;
6213 /* An important special case -- if we are asked to express value of
6214 the original iv by itself, just exit; there is no need to
6215 introduce a new computation (that might also need casting the
6216 variable to unsigned and back). */
6217 if (cand->pos == IP_ORIGINAL
6218 && cand->incremented_at == use->stmt)
6220 enum tree_code stmt_code;
6222 gcc_assert (is_gimple_assign (use->stmt));
6223 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6225 /* Check whether we may leave the computation unchanged.
6226 This is the case only if it does not rely on other
6227 computations in the loop -- otherwise, the computation
6228 we rely upon may be removed in remove_unused_ivs,
6229 thus leading to ICE. */
6230 stmt_code = gimple_assign_rhs_code (use->stmt);
6231 if (stmt_code == PLUS_EXPR
6232 || stmt_code == MINUS_EXPR
6233 || stmt_code == POINTER_PLUS_EXPR)
6235 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6236 op = gimple_assign_rhs2 (use->stmt);
6237 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6238 op = gimple_assign_rhs1 (use->stmt);
6239 else
6240 op = NULL_TREE;
6242 else
6243 op = NULL_TREE;
6245 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6246 return;
6249 comp = get_computation (data->current_loop, use, cand);
6250 gcc_assert (comp != NULL_TREE);
6252 switch (gimple_code (use->stmt))
6254 case GIMPLE_PHI:
6255 tgt = PHI_RESULT (use->stmt);
6257 /* If we should keep the biv, do not replace it. */
6258 if (name_info (data, tgt)->preserve_biv)
6259 return;
6261 bsi = gsi_after_labels (gimple_bb (use->stmt));
6262 break;
6264 case GIMPLE_ASSIGN:
6265 tgt = gimple_assign_lhs (use->stmt);
6266 bsi = gsi_for_stmt (use->stmt);
6267 break;
6269 default:
6270 gcc_unreachable ();
6273 if (!valid_gimple_rhs_p (comp)
6274 || (gimple_code (use->stmt) != GIMPLE_PHI
6275 /* We can't allow re-allocating the stmt as it might be pointed
6276 to still. */
6277 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6278 >= gimple_num_ops (gsi_stmt (bsi)))))
6280 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6281 true, GSI_SAME_STMT);
6282 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6284 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6285 /* As this isn't a plain copy we have to reset alignment
6286 information. */
6287 if (SSA_NAME_PTR_INFO (comp))
6288 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6292 if (gimple_code (use->stmt) == GIMPLE_PHI)
6294 ass = gimple_build_assign (tgt, comp);
6295 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6297 bsi = gsi_for_stmt (use->stmt);
6298 remove_phi_node (&bsi, false);
6300 else
6302 gimple_assign_set_rhs_from_tree (&bsi, comp);
6303 use->stmt = gsi_stmt (bsi);
6307 /* Performs a peephole optimization to reorder the iv update statement with
6308 a mem ref to enable instruction combining in later phases. The mem ref uses
6309 the iv value before the update, so the reordering transformation requires
6310 adjustment of the offset. CAND is the selected IV_CAND.
6312 Example:
6314 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6315 iv2 = iv1 + 1;
6317 if (t < val) (1)
6318 goto L;
6319 goto Head;
6322 directly propagating t over to (1) will introduce overlapping live range
6323 thus increase register pressure. This peephole transform it into:
6326 iv2 = iv1 + 1;
6327 t = MEM_REF (base, iv2, 8, 8);
6328 if (t < val)
6329 goto L;
6330 goto Head;
6333 static void
6334 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6336 tree var_after;
6337 gimple iv_update, stmt;
6338 basic_block bb;
6339 gimple_stmt_iterator gsi, gsi_iv;
6341 if (cand->pos != IP_NORMAL)
6342 return;
6344 var_after = cand->var_after;
6345 iv_update = SSA_NAME_DEF_STMT (var_after);
6347 bb = gimple_bb (iv_update);
6348 gsi = gsi_last_nondebug_bb (bb);
6349 stmt = gsi_stmt (gsi);
6351 /* Only handle conditional statement for now. */
6352 if (gimple_code (stmt) != GIMPLE_COND)
6353 return;
6355 gsi_prev_nondebug (&gsi);
6356 stmt = gsi_stmt (gsi);
6357 if (stmt != iv_update)
6358 return;
6360 gsi_prev_nondebug (&gsi);
6361 if (gsi_end_p (gsi))
6362 return;
6364 stmt = gsi_stmt (gsi);
6365 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6366 return;
6368 if (stmt != use->stmt)
6369 return;
6371 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6372 return;
6374 if (dump_file && (dump_flags & TDF_DETAILS))
6376 fprintf (dump_file, "Reordering \n");
6377 print_gimple_stmt (dump_file, iv_update, 0, 0);
6378 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6379 fprintf (dump_file, "\n");
6382 gsi = gsi_for_stmt (use->stmt);
6383 gsi_iv = gsi_for_stmt (iv_update);
6384 gsi_move_before (&gsi_iv, &gsi);
6386 cand->pos = IP_BEFORE_USE;
6387 cand->incremented_at = use->stmt;
6390 /* Rewrites USE (address that is an iv) using candidate CAND. */
6392 static void
6393 rewrite_use_address (struct ivopts_data *data,
6394 struct iv_use *use, struct iv_cand *cand)
6396 aff_tree aff;
6397 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6398 tree base_hint = NULL_TREE;
6399 tree ref, iv;
6400 bool ok;
6402 adjust_iv_update_pos (cand, use);
6403 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6404 gcc_assert (ok);
6405 unshare_aff_combination (&aff);
6407 /* To avoid undefined overflow problems, all IV candidates use unsigned
6408 integer types. The drawback is that this makes it impossible for
6409 create_mem_ref to distinguish an IV that is based on a memory object
6410 from one that represents simply an offset.
6412 To work around this problem, we pass a hint to create_mem_ref that
6413 indicates which variable (if any) in aff is an IV based on a memory
6414 object. Note that we only consider the candidate. If this is not
6415 based on an object, the base of the reference is in some subexpression
6416 of the use -- but these will use pointer types, so they are recognized
6417 by the create_mem_ref heuristics anyway. */
6418 if (cand->iv->base_object)
6419 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6421 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6422 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6423 reference_alias_ptr_type (*use->op_p),
6424 iv, base_hint, data->speed);
6425 copy_ref_info (ref, *use->op_p);
6426 *use->op_p = ref;
6429 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6430 candidate CAND. */
6432 static void
6433 rewrite_use_compare (struct ivopts_data *data,
6434 struct iv_use *use, struct iv_cand *cand)
6436 tree comp, *var_p, op, bound;
6437 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6438 enum tree_code compare;
6439 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6440 bool ok;
6442 bound = cp->value;
6443 if (bound)
6445 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6446 tree var_type = TREE_TYPE (var);
6447 gimple_seq stmts;
6449 if (dump_file && (dump_flags & TDF_DETAILS))
6451 fprintf (dump_file, "Replacing exit test: ");
6452 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6454 compare = cp->comp;
6455 bound = unshare_expr (fold_convert (var_type, bound));
6456 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6457 if (stmts)
6458 gsi_insert_seq_on_edge_immediate (
6459 loop_preheader_edge (data->current_loop),
6460 stmts);
6462 gimple_cond_set_lhs (use->stmt, var);
6463 gimple_cond_set_code (use->stmt, compare);
6464 gimple_cond_set_rhs (use->stmt, op);
6465 return;
6468 /* The induction variable elimination failed; just express the original
6469 giv. */
6470 comp = get_computation (data->current_loop, use, cand);
6471 gcc_assert (comp != NULL_TREE);
6473 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6474 gcc_assert (ok);
6476 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6477 true, GSI_SAME_STMT);
6480 /* Rewrites USE using candidate CAND. */
6482 static void
6483 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6485 switch (use->type)
6487 case USE_NONLINEAR_EXPR:
6488 rewrite_use_nonlinear_expr (data, use, cand);
6489 break;
6491 case USE_ADDRESS:
6492 rewrite_use_address (data, use, cand);
6493 break;
6495 case USE_COMPARE:
6496 rewrite_use_compare (data, use, cand);
6497 break;
6499 default:
6500 gcc_unreachable ();
6503 update_stmt (use->stmt);
6506 /* Rewrite the uses using the selected induction variables. */
6508 static void
6509 rewrite_uses (struct ivopts_data *data)
6511 unsigned i;
6512 struct iv_cand *cand;
6513 struct iv_use *use;
6515 for (i = 0; i < n_iv_uses (data); i++)
6517 use = iv_use (data, i);
6518 cand = use->selected;
6519 gcc_assert (cand);
6521 rewrite_use (data, use, cand);
6525 /* Removes the ivs that are not used after rewriting. */
6527 static void
6528 remove_unused_ivs (struct ivopts_data *data)
6530 unsigned j;
6531 bitmap_iterator bi;
6532 bitmap toremove = BITMAP_ALLOC (NULL);
6534 /* Figure out an order in which to release SSA DEFs so that we don't
6535 release something that we'd have to propagate into a debug stmt
6536 afterwards. */
6537 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6539 struct version_info *info;
6541 info = ver_info (data, j);
6542 if (info->iv
6543 && !integer_zerop (info->iv->step)
6544 && !info->inv_id
6545 && !info->iv->have_use_for
6546 && !info->preserve_biv)
6548 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6550 tree def = info->iv->ssa_name;
6552 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6554 imm_use_iterator imm_iter;
6555 use_operand_p use_p;
6556 gimple stmt;
6557 int count = 0;
6559 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6561 if (!gimple_debug_bind_p (stmt))
6562 continue;
6564 /* We just want to determine whether to do nothing
6565 (count == 0), to substitute the computed
6566 expression into a single use of the SSA DEF by
6567 itself (count == 1), or to use a debug temp
6568 because the SSA DEF is used multiple times or as
6569 part of a larger expression (count > 1). */
6570 count++;
6571 if (gimple_debug_bind_get_value (stmt) != def)
6572 count++;
6574 if (count > 1)
6575 BREAK_FROM_IMM_USE_STMT (imm_iter);
6578 if (!count)
6579 continue;
6581 struct iv_use dummy_use;
6582 struct iv_cand *best_cand = NULL, *cand;
6583 unsigned i, best_pref = 0, cand_pref;
6585 memset (&dummy_use, 0, sizeof (dummy_use));
6586 dummy_use.iv = info->iv;
6587 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6589 cand = iv_use (data, i)->selected;
6590 if (cand == best_cand)
6591 continue;
6592 cand_pref = operand_equal_p (cand->iv->step,
6593 info->iv->step, 0)
6594 ? 4 : 0;
6595 cand_pref
6596 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6597 == TYPE_MODE (TREE_TYPE (info->iv->base))
6598 ? 2 : 0;
6599 cand_pref
6600 += TREE_CODE (cand->iv->base) == INTEGER_CST
6601 ? 1 : 0;
6602 if (best_cand == NULL || best_pref < cand_pref)
6604 best_cand = cand;
6605 best_pref = cand_pref;
6609 if (!best_cand)
6610 continue;
6612 tree comp = get_computation_at (data->current_loop,
6613 &dummy_use, best_cand,
6614 SSA_NAME_DEF_STMT (def));
6615 if (!comp)
6616 continue;
6618 if (count > 1)
6620 tree vexpr = make_node (DEBUG_EXPR_DECL);
6621 DECL_ARTIFICIAL (vexpr) = 1;
6622 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6623 if (SSA_NAME_VAR (def))
6624 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6625 else
6626 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6627 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6628 gimple_stmt_iterator gsi;
6630 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6631 gsi = gsi_after_labels (gimple_bb
6632 (SSA_NAME_DEF_STMT (def)));
6633 else
6634 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6636 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6637 comp = vexpr;
6640 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6642 if (!gimple_debug_bind_p (stmt))
6643 continue;
6645 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6646 SET_USE (use_p, comp);
6648 update_stmt (stmt);
6654 release_defs_bitset (toremove);
6656 BITMAP_FREE (toremove);
6659 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6660 for hash_map::traverse. */
6662 bool
6663 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6665 free (value);
6666 return true;
6669 /* Frees data allocated by the optimization of a single loop. */
6671 static void
6672 free_loop_data (struct ivopts_data *data)
6674 unsigned i, j;
6675 bitmap_iterator bi;
6676 tree obj;
6678 if (data->niters)
6680 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6681 delete data->niters;
6682 data->niters = NULL;
6685 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6687 struct version_info *info;
6689 info = ver_info (data, i);
6690 free (info->iv);
6691 info->iv = NULL;
6692 info->has_nonlin_use = false;
6693 info->preserve_biv = false;
6694 info->inv_id = 0;
6696 bitmap_clear (data->relevant);
6697 bitmap_clear (data->important_candidates);
6699 for (i = 0; i < n_iv_uses (data); i++)
6701 struct iv_use *use = iv_use (data, i);
6703 free (use->iv);
6704 BITMAP_FREE (use->related_cands);
6705 for (j = 0; j < use->n_map_members; j++)
6706 if (use->cost_map[j].depends_on)
6707 BITMAP_FREE (use->cost_map[j].depends_on);
6708 free (use->cost_map);
6709 free (use);
6711 data->iv_uses.truncate (0);
6713 for (i = 0; i < n_iv_cands (data); i++)
6715 struct iv_cand *cand = iv_cand (data, i);
6717 free (cand->iv);
6718 if (cand->depends_on)
6719 BITMAP_FREE (cand->depends_on);
6720 free (cand);
6722 data->iv_candidates.truncate (0);
6724 if (data->version_info_size < num_ssa_names)
6726 data->version_info_size = 2 * num_ssa_names;
6727 free (data->version_info);
6728 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6731 data->max_inv_id = 0;
6733 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6734 SET_DECL_RTL (obj, NULL_RTX);
6736 decl_rtl_to_reset.truncate (0);
6738 data->inv_expr_tab->empty ();
6739 data->inv_expr_id = 0;
6742 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6743 loop tree. */
6745 static void
6746 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6748 free_loop_data (data);
6749 free (data->version_info);
6750 BITMAP_FREE (data->relevant);
6751 BITMAP_FREE (data->important_candidates);
6753 decl_rtl_to_reset.release ();
6754 data->iv_uses.release ();
6755 data->iv_candidates.release ();
6756 delete data->inv_expr_tab;
6757 data->inv_expr_tab = NULL;
6758 free_affine_expand_cache (&data->name_expansion_cache);
6761 /* Returns true if the loop body BODY includes any function calls. */
6763 static bool
6764 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6766 gimple_stmt_iterator gsi;
6767 unsigned i;
6769 for (i = 0; i < num_nodes; i++)
6770 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6772 gimple stmt = gsi_stmt (gsi);
6773 if (is_gimple_call (stmt)
6774 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6775 return true;
6777 return false;
6780 /* Optimizes the LOOP. Returns true if anything changed. */
6782 static bool
6783 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6785 bool changed = false;
6786 struct iv_ca *iv_ca;
6787 edge exit = single_dom_exit (loop);
6788 basic_block *body;
6790 gcc_assert (!data->niters);
6791 data->current_loop = loop;
6792 data->speed = optimize_loop_for_speed_p (loop);
6794 if (dump_file && (dump_flags & TDF_DETAILS))
6796 fprintf (dump_file, "Processing loop %d\n", loop->num);
6798 if (exit)
6800 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6801 exit->src->index, exit->dest->index);
6802 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6803 fprintf (dump_file, "\n");
6806 fprintf (dump_file, "\n");
6809 body = get_loop_body (loop);
6810 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6811 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6812 free (body);
6814 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6816 /* For each ssa name determines whether it behaves as an induction variable
6817 in some loop. */
6818 if (!find_induction_variables (data))
6819 goto finish;
6821 /* Finds interesting uses (item 1). */
6822 find_interesting_uses (data);
6823 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6824 goto finish;
6826 /* Finds candidates for the induction variables (item 2). */
6827 find_iv_candidates (data);
6829 /* Calculates the costs (item 3, part 1). */
6830 determine_iv_costs (data);
6831 determine_use_iv_costs (data);
6832 determine_set_costs (data);
6834 /* Find the optimal set of induction variables (item 3, part 2). */
6835 iv_ca = find_optimal_iv_set (data);
6836 if (!iv_ca)
6837 goto finish;
6838 changed = true;
6840 /* Create the new induction variables (item 4, part 1). */
6841 create_new_ivs (data, iv_ca);
6842 iv_ca_free (&iv_ca);
6844 /* Rewrite the uses (item 4, part 2). */
6845 rewrite_uses (data);
6847 /* Remove the ivs that are unused after rewriting. */
6848 remove_unused_ivs (data);
6850 /* We have changed the structure of induction variables; it might happen
6851 that definitions in the scev database refer to some of them that were
6852 eliminated. */
6853 scev_reset ();
6855 finish:
6856 free_loop_data (data);
6858 return changed;
6861 /* Main entry point. Optimizes induction variables in loops. */
6863 void
6864 tree_ssa_iv_optimize (void)
6866 struct loop *loop;
6867 struct ivopts_data data;
6869 tree_ssa_iv_optimize_init (&data);
6871 /* Optimize the loops starting with the innermost ones. */
6872 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6874 if (dump_file && (dump_flags & TDF_DETAILS))
6875 flow_loop_dump (loop, dump_file, NULL, 1);
6877 tree_ssa_iv_optimize_loop (&data, loop);
6880 tree_ssa_iv_optimize_finalize (&data);