* pt.c (lookup_template_class_1): Splice out abi_tag attribute if
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob400798ab9158b343799fe29a52f4d7926194c2d5
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand *selected;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
217 enum iv_position
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
226 /* The induction variable candidate. */
227 struct iv_cand
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
252 tree expr;
253 int id;
254 hashval_t hash;
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use *iv_use_p;
261 typedef struct iv_cand *iv_cand_p;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
273 /* Hash function for loop invariant expressions. */
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
278 return expr->hash;
281 /* Hash table equality function for expressions. */
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
290 struct ivopts_data
292 /* The currently optimized loop. */
293 struct loop *current_loop;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
298 /* Number of registers used in it. */
299 unsigned regs_used;
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
311 /* Loop invariant expression id. */
312 int inv_expr_id;
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
326 /* Cache used by tree_to_aff_combination_expand. */
327 hash_map<tree, name_expansion *> *name_expansion_cache;
329 /* The maximum invariant id. */
330 unsigned max_inv_id;
332 /* Whether to consider just related and important candidates when replacing a
333 use. */
334 bool consider_all_candidates;
336 /* Are we optimizing for speed? */
337 bool speed;
339 /* Whether the loop body includes any function calls. */
340 bool body_includes_call;
342 /* Whether the loop body can only be exited via single exit. */
343 bool loop_single_exit_p;
346 /* An assignment of iv candidates to uses. */
348 struct iv_ca
350 /* The number of uses covered by the assignment. */
351 unsigned upto;
353 /* Number of uses that cannot be expressed by the candidates in the set. */
354 unsigned bad_uses;
356 /* Candidate assigned to a use, together with the related costs. */
357 struct cost_pair **cand_for_use;
359 /* Number of times each candidate is used. */
360 unsigned *n_cand_uses;
362 /* The candidates used. */
363 bitmap cands;
365 /* The number of candidates in the set. */
366 unsigned n_cands;
368 /* Total number of registers needed. */
369 unsigned n_regs;
371 /* Total cost of expressing uses. */
372 comp_cost cand_use_cost;
374 /* Total cost of candidates. */
375 unsigned cand_cost;
377 /* Number of times each invariant is used. */
378 unsigned *n_invariant_uses;
380 /* The array holding the number of uses of each loop
381 invariant expressions created by ivopt. */
382 unsigned *used_inv_expr;
384 /* The number of created loop invariants. */
385 unsigned num_used_inv_expr;
387 /* Total cost of the assignment. */
388 comp_cost cost;
391 /* Difference of two iv candidate assignments. */
393 struct iv_ca_delta
395 /* Changed use. */
396 struct iv_use *use;
398 /* An old assignment (for rollback purposes). */
399 struct cost_pair *old_cp;
401 /* A new assignment. */
402 struct cost_pair *new_cp;
404 /* Next change in the list. */
405 struct iv_ca_delta *next_change;
408 /* Bound on number of candidates below that all candidates are considered. */
410 #define CONSIDER_ALL_CANDIDATES_BOUND \
411 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
414 optimizing such a loop would help, and it would take ages). */
416 #define MAX_CONSIDERED_USES \
417 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
419 /* If there are at most this number of ivs in the set, try removing unnecessary
420 ivs from the set always. */
422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
425 /* The list of trees for that the decl_rtl field must be reset is stored
426 here. */
428 static vec<tree> decl_rtl_to_reset;
430 static comp_cost force_expr_to_var_cost (tree, bool);
432 /* Number of uses recorded in DATA. */
434 static inline unsigned
435 n_iv_uses (struct ivopts_data *data)
437 return data->iv_uses.length ();
440 /* Ith use recorded in DATA. */
442 static inline struct iv_use *
443 iv_use (struct ivopts_data *data, unsigned i)
445 return data->iv_uses[i];
448 /* Number of candidates recorded in DATA. */
450 static inline unsigned
451 n_iv_cands (struct ivopts_data *data)
453 return data->iv_candidates.length ();
456 /* Ith candidate recorded in DATA. */
458 static inline struct iv_cand *
459 iv_cand (struct ivopts_data *data, unsigned i)
461 return data->iv_candidates[i];
464 /* The single loop exit if it dominates the latch, NULL otherwise. */
466 edge
467 single_dom_exit (struct loop *loop)
469 edge exit = single_exit (loop);
471 if (!exit)
472 return NULL;
474 if (!just_once_each_iteration_p (loop, exit->src))
475 return NULL;
477 return exit;
480 /* Dumps information about the induction variable IV to FILE. */
482 void
483 dump_iv (FILE *file, struct iv *iv)
485 if (iv->ssa_name)
487 fprintf (file, "ssa name ");
488 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
489 fprintf (file, "\n");
492 fprintf (file, " type ");
493 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
494 fprintf (file, "\n");
496 if (iv->step)
498 fprintf (file, " base ");
499 print_generic_expr (file, iv->base, TDF_SLIM);
500 fprintf (file, "\n");
502 fprintf (file, " step ");
503 print_generic_expr (file, iv->step, TDF_SLIM);
504 fprintf (file, "\n");
506 else
508 fprintf (file, " invariant ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
513 if (iv->base_object)
515 fprintf (file, " base object ");
516 print_generic_expr (file, iv->base_object, TDF_SLIM);
517 fprintf (file, "\n");
520 if (iv->biv_p)
521 fprintf (file, " is a biv\n");
524 /* Dumps information about the USE to FILE. */
526 void
527 dump_use (FILE *file, struct iv_use *use)
529 fprintf (file, "use %d\n", use->id);
531 switch (use->type)
533 case USE_NONLINEAR_EXPR:
534 fprintf (file, " generic\n");
535 break;
537 case USE_ADDRESS:
538 fprintf (file, " address\n");
539 break;
541 case USE_COMPARE:
542 fprintf (file, " compare\n");
543 break;
545 default:
546 gcc_unreachable ();
549 fprintf (file, " in statement ");
550 print_gimple_stmt (file, use->stmt, 0, 0);
551 fprintf (file, "\n");
553 fprintf (file, " at position ");
554 if (use->op_p)
555 print_generic_expr (file, *use->op_p, TDF_SLIM);
556 fprintf (file, "\n");
558 dump_iv (file, use->iv);
560 if (use->related_cands)
562 fprintf (file, " related candidates ");
563 dump_bitmap (file, use->related_cands);
567 /* Dumps information about the uses to FILE. */
569 void
570 dump_uses (FILE *file, struct ivopts_data *data)
572 unsigned i;
573 struct iv_use *use;
575 for (i = 0; i < n_iv_uses (data); i++)
577 use = iv_use (data, i);
579 dump_use (file, use);
580 fprintf (file, "\n");
584 /* Dumps information about induction variable candidate CAND to FILE. */
586 void
587 dump_cand (FILE *file, struct iv_cand *cand)
589 struct iv *iv = cand->iv;
591 fprintf (file, "candidate %d%s\n",
592 cand->id, cand->important ? " (important)" : "");
594 if (cand->depends_on)
596 fprintf (file, " depends on ");
597 dump_bitmap (file, cand->depends_on);
600 if (!iv)
602 fprintf (file, " final value replacement\n");
603 return;
606 if (cand->var_before)
608 fprintf (file, " var_before ");
609 print_generic_expr (file, cand->var_before, TDF_SLIM);
610 fprintf (file, "\n");
612 if (cand->var_after)
614 fprintf (file, " var_after ");
615 print_generic_expr (file, cand->var_after, TDF_SLIM);
616 fprintf (file, "\n");
619 switch (cand->pos)
621 case IP_NORMAL:
622 fprintf (file, " incremented before exit test\n");
623 break;
625 case IP_BEFORE_USE:
626 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
627 break;
629 case IP_AFTER_USE:
630 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
631 break;
633 case IP_END:
634 fprintf (file, " incremented at end\n");
635 break;
637 case IP_ORIGINAL:
638 fprintf (file, " original biv\n");
639 break;
642 dump_iv (file, iv);
645 /* Returns the info for ssa version VER. */
647 static inline struct version_info *
648 ver_info (struct ivopts_data *data, unsigned ver)
650 return data->version_info + ver;
653 /* Returns the info for ssa name NAME. */
655 static inline struct version_info *
656 name_info (struct ivopts_data *data, tree name)
658 return ver_info (data, SSA_NAME_VERSION (name));
661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 emitted in LOOP. */
664 static bool
665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
667 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
669 gcc_assert (bb);
671 if (sbb == loop->latch)
672 return true;
674 if (sbb != bb)
675 return false;
677 return stmt == last_stmt (bb);
680 /* Returns true if STMT if after the place where the original induction
681 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
682 if the positions are identical. */
684 static bool
685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
687 basic_block cand_bb = gimple_bb (cand->incremented_at);
688 basic_block stmt_bb = gimple_bb (stmt);
690 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
691 return false;
693 if (stmt_bb != cand_bb)
694 return true;
696 if (true_if_equal
697 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
698 return true;
699 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
702 /* Returns true if STMT if after the place where the induction variable
703 CAND is incremented in LOOP. */
705 static bool
706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
708 switch (cand->pos)
710 case IP_END:
711 return false;
713 case IP_NORMAL:
714 return stmt_after_ip_normal_pos (loop, stmt);
716 case IP_ORIGINAL:
717 case IP_AFTER_USE:
718 return stmt_after_inc_pos (cand, stmt, false);
720 case IP_BEFORE_USE:
721 return stmt_after_inc_pos (cand, stmt, true);
723 default:
724 gcc_unreachable ();
728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
730 static bool
731 abnormal_ssa_name_p (tree exp)
733 if (!exp)
734 return false;
736 if (TREE_CODE (exp) != SSA_NAME)
737 return false;
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
743 abnormal phi node. Callback for for_each_index. */
745 static bool
746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
747 void *data ATTRIBUTE_UNUSED)
749 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
751 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
752 return false;
753 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
754 return false;
757 return !abnormal_ssa_name_p (*index);
760 /* Returns true if EXPR contains a ssa name that occurs in an
761 abnormal phi node. */
763 bool
764 contains_abnormal_ssa_name_p (tree expr)
766 enum tree_code code;
767 enum tree_code_class codeclass;
769 if (!expr)
770 return false;
772 code = TREE_CODE (expr);
773 codeclass = TREE_CODE_CLASS (code);
775 if (code == SSA_NAME)
776 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
778 if (code == INTEGER_CST
779 || is_gimple_min_invariant (expr))
780 return false;
782 if (code == ADDR_EXPR)
783 return !for_each_index (&TREE_OPERAND (expr, 0),
784 idx_contains_abnormal_ssa_name_p,
785 NULL);
787 if (code == COND_EXPR)
788 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
789 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
790 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
792 switch (codeclass)
794 case tcc_binary:
795 case tcc_comparison:
796 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
797 return true;
799 /* Fallthru. */
800 case tcc_unary:
801 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
802 return true;
804 break;
806 default:
807 gcc_unreachable ();
810 return false;
813 /* Returns the structure describing number of iterations determined from
814 EXIT of DATA->current_loop, or NULL if something goes wrong. */
816 static struct tree_niter_desc *
817 niter_for_exit (struct ivopts_data *data, edge exit)
819 struct tree_niter_desc *desc;
820 tree_niter_desc **slot;
822 if (!data->niters)
824 data->niters = new hash_map<edge, tree_niter_desc *>;
825 slot = NULL;
827 else
828 slot = data->niters->get (exit);
830 if (!slot)
832 /* Try to determine number of iterations. We cannot safely work with ssa
833 names that appear in phi nodes on abnormal edges, so that we do not
834 create overlapping life ranges for them (PR 27283). */
835 desc = XNEW (struct tree_niter_desc);
836 if (!number_of_iterations_exit (data->current_loop,
837 exit, desc, true)
838 || contains_abnormal_ssa_name_p (desc->niter))
840 XDELETE (desc);
841 desc = NULL;
843 data->niters->put (exit, desc);
845 else
846 desc = *slot;
848 return desc;
851 /* Returns the structure describing number of iterations determined from
852 single dominating exit of DATA->current_loop, or NULL if something
853 goes wrong. */
855 static struct tree_niter_desc *
856 niter_for_single_dom_exit (struct ivopts_data *data)
858 edge exit = single_dom_exit (data->current_loop);
860 if (!exit)
861 return NULL;
863 return niter_for_exit (data, exit);
866 /* Initializes data structures used by the iv optimization pass, stored
867 in DATA. */
869 static void
870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
872 data->version_info_size = 2 * num_ssa_names;
873 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
874 data->relevant = BITMAP_ALLOC (NULL);
875 data->important_candidates = BITMAP_ALLOC (NULL);
876 data->max_inv_id = 0;
877 data->niters = NULL;
878 data->iv_uses.create (20);
879 data->iv_candidates.create (20);
880 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
881 data->inv_expr_id = 0;
882 data->name_expansion_cache = NULL;
883 decl_rtl_to_reset.create (20);
886 /* Returns a memory object to that EXPR points. In case we are able to
887 determine that it does not point to any such object, NULL is returned. */
889 static tree
890 determine_base_object (tree expr)
892 enum tree_code code = TREE_CODE (expr);
893 tree base, obj;
895 /* If this is a pointer casted to any type, we need to determine
896 the base object for the pointer; so handle conversions before
897 throwing away non-pointer expressions. */
898 if (CONVERT_EXPR_P (expr))
899 return determine_base_object (TREE_OPERAND (expr, 0));
901 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
902 return NULL_TREE;
904 switch (code)
906 case INTEGER_CST:
907 return NULL_TREE;
909 case ADDR_EXPR:
910 obj = TREE_OPERAND (expr, 0);
911 base = get_base_address (obj);
913 if (!base)
914 return expr;
916 if (TREE_CODE (base) == MEM_REF)
917 return determine_base_object (TREE_OPERAND (base, 0));
919 return fold_convert (ptr_type_node,
920 build_fold_addr_expr (base));
922 case POINTER_PLUS_EXPR:
923 return determine_base_object (TREE_OPERAND (expr, 0));
925 case PLUS_EXPR:
926 case MINUS_EXPR:
927 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
928 gcc_unreachable ();
930 default:
931 return fold_convert (ptr_type_node, expr);
935 /* Return true if address expression with non-DECL_P operand appears
936 in EXPR. */
938 static bool
939 contain_complex_addr_expr (tree expr)
941 bool res = false;
943 STRIP_NOPS (expr);
944 switch (TREE_CODE (expr))
946 case POINTER_PLUS_EXPR:
947 case PLUS_EXPR:
948 case MINUS_EXPR:
949 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
950 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
951 break;
953 case ADDR_EXPR:
954 return (!DECL_P (TREE_OPERAND (expr, 0)));
956 default:
957 return false;
960 return res;
963 /* Allocates an induction variable with given initial value BASE and step STEP
964 for loop LOOP. */
966 static struct iv *
967 alloc_iv (tree base, tree step)
969 tree expr = base;
970 struct iv *iv = XCNEW (struct iv);
971 gcc_assert (step != NULL_TREE);
973 /* Lower address expression in base except ones with DECL_P as operand.
974 By doing this:
975 1) More accurate cost can be computed for address expressions;
976 2) Duplicate candidates won't be created for bases in different
977 forms, like &a[0] and &a. */
978 STRIP_NOPS (expr);
979 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
980 || contain_complex_addr_expr (expr))
982 aff_tree comb;
983 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
984 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
987 iv->base = base;
988 iv->base_object = determine_base_object (base);
989 iv->step = step;
990 iv->biv_p = false;
991 iv->have_use_for = false;
992 iv->use_id = 0;
993 iv->ssa_name = NULL_TREE;
995 return iv;
998 /* Sets STEP and BASE for induction variable IV. */
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1003 struct version_info *info = name_info (data, iv);
1005 gcc_assert (!info->iv);
1007 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008 info->iv = alloc_iv (base, step);
1009 info->iv->ssa_name = iv;
1012 /* Finds induction variable declaration for VAR. */
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1017 basic_block bb;
1018 tree type = TREE_TYPE (var);
1020 if (!POINTER_TYPE_P (type)
1021 && !INTEGRAL_TYPE_P (type))
1022 return NULL;
1024 if (!name_info (data, var)->iv)
1026 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1028 if (!bb
1029 || !flow_bb_inside_loop_p (data->current_loop, bb))
1030 set_iv (data, var, var, build_int_cst (type, 0));
1033 return name_info (data, var)->iv;
1036 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1037 not define a simple affine biv with nonzero step. */
1039 static tree
1040 determine_biv_step (gimple phi)
1042 struct loop *loop = gimple_bb (phi)->loop_father;
1043 tree name = PHI_RESULT (phi);
1044 affine_iv iv;
1046 if (virtual_operand_p (name))
1047 return NULL_TREE;
1049 if (!simple_iv (loop, loop, name, &iv, true))
1050 return NULL_TREE;
1052 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1055 /* Finds basic ivs. */
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1060 gimple phi;
1061 tree step, type, base;
1062 bool found = false;
1063 struct loop *loop = data->current_loop;
1064 gimple_stmt_iterator psi;
1066 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1068 phi = gsi_stmt (psi);
1070 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071 continue;
1073 step = determine_biv_step (phi);
1074 if (!step)
1075 continue;
1077 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078 base = expand_simple_operations (base);
1079 if (contains_abnormal_ssa_name_p (base)
1080 || contains_abnormal_ssa_name_p (step))
1081 continue;
1083 type = TREE_TYPE (PHI_RESULT (phi));
1084 base = fold_convert (type, base);
1085 if (step)
1087 if (POINTER_TYPE_P (type))
1088 step = convert_to_ptrofftype (step);
1089 else
1090 step = fold_convert (type, step);
1093 set_iv (data, PHI_RESULT (phi), base, step);
1094 found = true;
1097 return found;
1100 /* Marks basic ivs. */
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1105 gimple phi, def;
1106 tree var;
1107 struct iv *iv, *incr_iv;
1108 struct loop *loop = data->current_loop;
1109 basic_block incr_bb;
1110 gimple_stmt_iterator psi;
1112 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1114 phi = gsi_stmt (psi);
1116 iv = get_iv (data, PHI_RESULT (phi));
1117 if (!iv)
1118 continue;
1120 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1121 def = SSA_NAME_DEF_STMT (var);
1122 /* Don't mark iv peeled from other one as biv. */
1123 if (def
1124 && gimple_code (def) == GIMPLE_PHI
1125 && gimple_bb (def) == loop->header)
1126 continue;
1128 incr_iv = get_iv (data, var);
1129 if (!incr_iv)
1130 continue;
1132 /* If the increment is in the subloop, ignore it. */
1133 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1134 if (incr_bb->loop_father != data->current_loop
1135 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1136 continue;
1138 iv->biv_p = true;
1139 incr_iv->biv_p = true;
1143 /* Checks whether STMT defines a linear induction variable and stores its
1144 parameters to IV. */
1146 static bool
1147 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1149 tree lhs;
1150 struct loop *loop = data->current_loop;
1152 iv->base = NULL_TREE;
1153 iv->step = NULL_TREE;
1155 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1156 return false;
1158 lhs = gimple_assign_lhs (stmt);
1159 if (TREE_CODE (lhs) != SSA_NAME)
1160 return false;
1162 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1163 return false;
1164 iv->base = expand_simple_operations (iv->base);
1166 if (contains_abnormal_ssa_name_p (iv->base)
1167 || contains_abnormal_ssa_name_p (iv->step))
1168 return false;
1170 /* If STMT could throw, then do not consider STMT as defining a GIV.
1171 While this will suppress optimizations, we can not safely delete this
1172 GIV and associated statements, even if it appears it is not used. */
1173 if (stmt_could_throw_p (stmt))
1174 return false;
1176 return true;
1179 /* Finds general ivs in statement STMT. */
1181 static void
1182 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1184 affine_iv iv;
1186 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1187 return;
1189 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1192 /* Finds general ivs in basic block BB. */
1194 static void
1195 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1197 gimple_stmt_iterator bsi;
1199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1200 find_givs_in_stmt (data, gsi_stmt (bsi));
1203 /* Finds general ivs. */
1205 static void
1206 find_givs (struct ivopts_data *data)
1208 struct loop *loop = data->current_loop;
1209 basic_block *body = get_loop_body_in_dom_order (loop);
1210 unsigned i;
1212 for (i = 0; i < loop->num_nodes; i++)
1213 find_givs_in_bb (data, body[i]);
1214 free (body);
1217 /* For each ssa name defined in LOOP determines whether it is an induction
1218 variable and if so, its initial value and step. */
1220 static bool
1221 find_induction_variables (struct ivopts_data *data)
1223 unsigned i;
1224 bitmap_iterator bi;
1226 if (!find_bivs (data))
1227 return false;
1229 find_givs (data);
1230 mark_bivs (data);
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1234 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1236 if (niter)
1238 fprintf (dump_file, " number of iterations ");
1239 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1240 if (!integer_zerop (niter->may_be_zero))
1242 fprintf (dump_file, "; zero if ");
1243 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1245 fprintf (dump_file, "\n\n");
1248 fprintf (dump_file, "Induction variables:\n\n");
1250 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1252 if (ver_info (data, i)->iv)
1253 dump_iv (dump_file, ver_info (data, i)->iv);
1257 return true;
1260 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1262 static struct iv_use *
1263 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1264 gimple stmt, enum use_type use_type)
1266 struct iv_use *use = XCNEW (struct iv_use);
1268 use->id = n_iv_uses (data);
1269 use->type = use_type;
1270 use->iv = iv;
1271 use->stmt = stmt;
1272 use->op_p = use_p;
1273 use->related_cands = BITMAP_ALLOC (NULL);
1275 /* To avoid showing ssa name in the dumps, if it was not reset by the
1276 caller. */
1277 iv->ssa_name = NULL_TREE;
1279 if (dump_file && (dump_flags & TDF_DETAILS))
1280 dump_use (dump_file, use);
1282 data->iv_uses.safe_push (use);
1284 return use;
1287 /* Checks whether OP is a loop-level invariant and if so, records it.
1288 NONLINEAR_USE is true if the invariant is used in a way we do not
1289 handle specially. */
1291 static void
1292 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1294 basic_block bb;
1295 struct version_info *info;
1297 if (TREE_CODE (op) != SSA_NAME
1298 || virtual_operand_p (op))
1299 return;
1301 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1302 if (bb
1303 && flow_bb_inside_loop_p (data->current_loop, bb))
1304 return;
1306 info = name_info (data, op);
1307 info->name = op;
1308 info->has_nonlin_use |= nonlinear_use;
1309 if (!info->inv_id)
1310 info->inv_id = ++data->max_inv_id;
1311 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1314 /* Checks whether the use OP is interesting and if so, records it. */
1316 static struct iv_use *
1317 find_interesting_uses_op (struct ivopts_data *data, tree op)
1319 struct iv *iv;
1320 struct iv *civ;
1321 gimple stmt;
1322 struct iv_use *use;
1324 if (TREE_CODE (op) != SSA_NAME)
1325 return NULL;
1327 iv = get_iv (data, op);
1328 if (!iv)
1329 return NULL;
1331 if (iv->have_use_for)
1333 use = iv_use (data, iv->use_id);
1335 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1336 return use;
1339 if (integer_zerop (iv->step))
1341 record_invariant (data, op, true);
1342 return NULL;
1344 iv->have_use_for = true;
1346 civ = XNEW (struct iv);
1347 *civ = *iv;
1349 stmt = SSA_NAME_DEF_STMT (op);
1350 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1351 || is_gimple_assign (stmt));
1353 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1354 iv->use_id = use->id;
1356 return use;
1359 /* Given a condition in statement STMT, checks whether it is a compare
1360 of an induction variable and an invariant. If this is the case,
1361 CONTROL_VAR is set to location of the iv, BOUND to the location of
1362 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1363 induction variable descriptions, and true is returned. If this is not
1364 the case, CONTROL_VAR and BOUND are set to the arguments of the
1365 condition and false is returned. */
1367 static bool
1368 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1369 tree **control_var, tree **bound,
1370 struct iv **iv_var, struct iv **iv_bound)
1372 /* The objects returned when COND has constant operands. */
1373 static struct iv const_iv;
1374 static tree zero;
1375 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1376 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1377 bool ret = false;
1379 if (gimple_code (stmt) == GIMPLE_COND)
1381 op0 = gimple_cond_lhs_ptr (stmt);
1382 op1 = gimple_cond_rhs_ptr (stmt);
1384 else
1386 op0 = gimple_assign_rhs1_ptr (stmt);
1387 op1 = gimple_assign_rhs2_ptr (stmt);
1390 zero = integer_zero_node;
1391 const_iv.step = integer_zero_node;
1393 if (TREE_CODE (*op0) == SSA_NAME)
1394 iv0 = get_iv (data, *op0);
1395 if (TREE_CODE (*op1) == SSA_NAME)
1396 iv1 = get_iv (data, *op1);
1398 /* Exactly one of the compared values must be an iv, and the other one must
1399 be an invariant. */
1400 if (!iv0 || !iv1)
1401 goto end;
1403 if (integer_zerop (iv0->step))
1405 /* Control variable may be on the other side. */
1406 tmp_op = op0; op0 = op1; op1 = tmp_op;
1407 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1409 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1411 end:
1412 if (control_var)
1413 *control_var = op0;;
1414 if (iv_var)
1415 *iv_var = iv0;;
1416 if (bound)
1417 *bound = op1;
1418 if (iv_bound)
1419 *iv_bound = iv1;
1421 return ret;
1424 /* Checks whether the condition in STMT is interesting and if so,
1425 records it. */
1427 static void
1428 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1430 tree *var_p, *bound_p;
1431 struct iv *var_iv, *civ;
1433 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1435 find_interesting_uses_op (data, *var_p);
1436 find_interesting_uses_op (data, *bound_p);
1437 return;
1440 civ = XNEW (struct iv);
1441 *civ = *var_iv;
1442 record_use (data, NULL, civ, stmt, USE_COMPARE);
1445 /* Returns the outermost loop EXPR is obviously invariant in
1446 relative to the loop LOOP, i.e. if all its operands are defined
1447 outside of the returned loop. Returns NULL if EXPR is not
1448 even obviously invariant in LOOP. */
1450 struct loop *
1451 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1453 basic_block def_bb;
1454 unsigned i, len;
1456 if (is_gimple_min_invariant (expr))
1457 return current_loops->tree_root;
1459 if (TREE_CODE (expr) == SSA_NAME)
1461 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1462 if (def_bb)
1464 if (flow_bb_inside_loop_p (loop, def_bb))
1465 return NULL;
1466 return superloop_at_depth (loop,
1467 loop_depth (def_bb->loop_father) + 1);
1470 return current_loops->tree_root;
1473 if (!EXPR_P (expr))
1474 return NULL;
1476 unsigned maxdepth = 0;
1477 len = TREE_OPERAND_LENGTH (expr);
1478 for (i = 0; i < len; i++)
1480 struct loop *ivloop;
1481 if (!TREE_OPERAND (expr, i))
1482 continue;
1484 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1485 if (!ivloop)
1486 return NULL;
1487 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1490 return superloop_at_depth (loop, maxdepth);
1493 /* Returns true if expression EXPR is obviously invariant in LOOP,
1494 i.e. if all its operands are defined outside of the LOOP. LOOP
1495 should not be the function body. */
1497 bool
1498 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1500 basic_block def_bb;
1501 unsigned i, len;
1503 gcc_assert (loop_depth (loop) > 0);
1505 if (is_gimple_min_invariant (expr))
1506 return true;
1508 if (TREE_CODE (expr) == SSA_NAME)
1510 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1511 if (def_bb
1512 && flow_bb_inside_loop_p (loop, def_bb))
1513 return false;
1515 return true;
1518 if (!EXPR_P (expr))
1519 return false;
1521 len = TREE_OPERAND_LENGTH (expr);
1522 for (i = 0; i < len; i++)
1523 if (TREE_OPERAND (expr, i)
1524 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1525 return false;
1527 return true;
1530 /* Cumulates the steps of indices into DATA and replaces their values with the
1531 initial ones. Returns false when the value of the index cannot be determined.
1532 Callback for for_each_index. */
1534 struct ifs_ivopts_data
1536 struct ivopts_data *ivopts_data;
1537 gimple stmt;
1538 tree step;
1541 static bool
1542 idx_find_step (tree base, tree *idx, void *data)
1544 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1545 struct iv *iv;
1546 tree step, iv_base, iv_step, lbound, off;
1547 struct loop *loop = dta->ivopts_data->current_loop;
1549 /* If base is a component ref, require that the offset of the reference
1550 be invariant. */
1551 if (TREE_CODE (base) == COMPONENT_REF)
1553 off = component_ref_field_offset (base);
1554 return expr_invariant_in_loop_p (loop, off);
1557 /* If base is array, first check whether we will be able to move the
1558 reference out of the loop (in order to take its address in strength
1559 reduction). In order for this to work we need both lower bound
1560 and step to be loop invariants. */
1561 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1563 /* Moreover, for a range, the size needs to be invariant as well. */
1564 if (TREE_CODE (base) == ARRAY_RANGE_REF
1565 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1566 return false;
1568 step = array_ref_element_size (base);
1569 lbound = array_ref_low_bound (base);
1571 if (!expr_invariant_in_loop_p (loop, step)
1572 || !expr_invariant_in_loop_p (loop, lbound))
1573 return false;
1576 if (TREE_CODE (*idx) != SSA_NAME)
1577 return true;
1579 iv = get_iv (dta->ivopts_data, *idx);
1580 if (!iv)
1581 return false;
1583 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1584 *&x[0], which is not folded and does not trigger the
1585 ARRAY_REF path below. */
1586 *idx = iv->base;
1588 if (integer_zerop (iv->step))
1589 return true;
1591 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1593 step = array_ref_element_size (base);
1595 /* We only handle addresses whose step is an integer constant. */
1596 if (TREE_CODE (step) != INTEGER_CST)
1597 return false;
1599 else
1600 /* The step for pointer arithmetics already is 1 byte. */
1601 step = size_one_node;
1603 iv_base = iv->base;
1604 iv_step = iv->step;
1605 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1606 sizetype, &iv_base, &iv_step, dta->stmt,
1607 false))
1609 /* The index might wrap. */
1610 return false;
1613 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1614 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1616 return true;
1619 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1620 object is passed to it in DATA. */
1622 static bool
1623 idx_record_use (tree base, tree *idx,
1624 void *vdata)
1626 struct ivopts_data *data = (struct ivopts_data *) vdata;
1627 find_interesting_uses_op (data, *idx);
1628 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1630 find_interesting_uses_op (data, array_ref_element_size (base));
1631 find_interesting_uses_op (data, array_ref_low_bound (base));
1633 return true;
1636 /* If we can prove that TOP = cst * BOT for some constant cst,
1637 store cst to MUL and return true. Otherwise return false.
1638 The returned value is always sign-extended, regardless of the
1639 signedness of TOP and BOT. */
1641 static bool
1642 constant_multiple_of (tree top, tree bot, widest_int *mul)
1644 tree mby;
1645 enum tree_code code;
1646 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1647 widest_int res, p0, p1;
1649 STRIP_NOPS (top);
1650 STRIP_NOPS (bot);
1652 if (operand_equal_p (top, bot, 0))
1654 *mul = 1;
1655 return true;
1658 code = TREE_CODE (top);
1659 switch (code)
1661 case MULT_EXPR:
1662 mby = TREE_OPERAND (top, 1);
1663 if (TREE_CODE (mby) != INTEGER_CST)
1664 return false;
1666 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1667 return false;
1669 *mul = wi::sext (res * wi::to_widest (mby), precision);
1670 return true;
1672 case PLUS_EXPR:
1673 case MINUS_EXPR:
1674 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1675 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1676 return false;
1678 if (code == MINUS_EXPR)
1679 p1 = -p1;
1680 *mul = wi::sext (p0 + p1, precision);
1681 return true;
1683 case INTEGER_CST:
1684 if (TREE_CODE (bot) != INTEGER_CST)
1685 return false;
1687 p0 = widest_int::from (top, SIGNED);
1688 p1 = widest_int::from (bot, SIGNED);
1689 if (p1 == 0)
1690 return false;
1691 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1692 return res == 0;
1694 default:
1695 return false;
1699 /* Return true if memory reference REF with step STEP may be unaligned. */
1701 static bool
1702 may_be_unaligned_p (tree ref, tree step)
1704 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1705 thus they are not misaligned. */
1706 if (TREE_CODE (ref) == TARGET_MEM_REF)
1707 return false;
1709 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1710 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1711 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1713 unsigned HOST_WIDE_INT bitpos;
1714 unsigned int ref_align;
1715 get_object_alignment_1 (ref, &ref_align, &bitpos);
1716 if (ref_align < align
1717 || (bitpos % align) != 0
1718 || (bitpos % BITS_PER_UNIT) != 0)
1719 return true;
1721 unsigned int trailing_zeros = tree_ctz (step);
1722 if (trailing_zeros < HOST_BITS_PER_INT
1723 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1724 return true;
1726 return false;
1729 /* Return true if EXPR may be non-addressable. */
1731 bool
1732 may_be_nonaddressable_p (tree expr)
1734 switch (TREE_CODE (expr))
1736 case TARGET_MEM_REF:
1737 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1738 target, thus they are always addressable. */
1739 return false;
1741 case COMPONENT_REF:
1742 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1743 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1745 case VIEW_CONVERT_EXPR:
1746 /* This kind of view-conversions may wrap non-addressable objects
1747 and make them look addressable. After some processing the
1748 non-addressability may be uncovered again, causing ADDR_EXPRs
1749 of inappropriate objects to be built. */
1750 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1751 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1752 return true;
1754 /* ... fall through ... */
1756 case ARRAY_REF:
1757 case ARRAY_RANGE_REF:
1758 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1760 CASE_CONVERT:
1761 return true;
1763 default:
1764 break;
1767 return false;
1770 /* Finds addresses in *OP_P inside STMT. */
1772 static void
1773 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1775 tree base = *op_p, step = size_zero_node;
1776 struct iv *civ;
1777 struct ifs_ivopts_data ifs_ivopts_data;
1779 /* Do not play with volatile memory references. A bit too conservative,
1780 perhaps, but safe. */
1781 if (gimple_has_volatile_ops (stmt))
1782 goto fail;
1784 /* Ignore bitfields for now. Not really something terribly complicated
1785 to handle. TODO. */
1786 if (TREE_CODE (base) == BIT_FIELD_REF)
1787 goto fail;
1789 base = unshare_expr (base);
1791 if (TREE_CODE (base) == TARGET_MEM_REF)
1793 tree type = build_pointer_type (TREE_TYPE (base));
1794 tree astep;
1796 if (TMR_BASE (base)
1797 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1799 civ = get_iv (data, TMR_BASE (base));
1800 if (!civ)
1801 goto fail;
1803 TMR_BASE (base) = civ->base;
1804 step = civ->step;
1806 if (TMR_INDEX2 (base)
1807 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1809 civ = get_iv (data, TMR_INDEX2 (base));
1810 if (!civ)
1811 goto fail;
1813 TMR_INDEX2 (base) = civ->base;
1814 step = civ->step;
1816 if (TMR_INDEX (base)
1817 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1819 civ = get_iv (data, TMR_INDEX (base));
1820 if (!civ)
1821 goto fail;
1823 TMR_INDEX (base) = civ->base;
1824 astep = civ->step;
1826 if (astep)
1828 if (TMR_STEP (base))
1829 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1831 step = fold_build2 (PLUS_EXPR, type, step, astep);
1835 if (integer_zerop (step))
1836 goto fail;
1837 base = tree_mem_ref_addr (type, base);
1839 else
1841 ifs_ivopts_data.ivopts_data = data;
1842 ifs_ivopts_data.stmt = stmt;
1843 ifs_ivopts_data.step = size_zero_node;
1844 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1845 || integer_zerop (ifs_ivopts_data.step))
1846 goto fail;
1847 step = ifs_ivopts_data.step;
1849 /* Check that the base expression is addressable. This needs
1850 to be done after substituting bases of IVs into it. */
1851 if (may_be_nonaddressable_p (base))
1852 goto fail;
1854 /* Moreover, on strict alignment platforms, check that it is
1855 sufficiently aligned. */
1856 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1857 goto fail;
1859 base = build_fold_addr_expr (base);
1861 /* Substituting bases of IVs into the base expression might
1862 have caused folding opportunities. */
1863 if (TREE_CODE (base) == ADDR_EXPR)
1865 tree *ref = &TREE_OPERAND (base, 0);
1866 while (handled_component_p (*ref))
1867 ref = &TREE_OPERAND (*ref, 0);
1868 if (TREE_CODE (*ref) == MEM_REF)
1870 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1871 TREE_OPERAND (*ref, 0),
1872 TREE_OPERAND (*ref, 1));
1873 if (tem)
1874 *ref = tem;
1879 civ = alloc_iv (base, step);
1880 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1881 return;
1883 fail:
1884 for_each_index (op_p, idx_record_use, data);
1887 /* Finds and records invariants used in STMT. */
1889 static void
1890 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1892 ssa_op_iter iter;
1893 use_operand_p use_p;
1894 tree op;
1896 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1898 op = USE_FROM_PTR (use_p);
1899 record_invariant (data, op, false);
1903 /* Finds interesting uses of induction variables in the statement STMT. */
1905 static void
1906 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1908 struct iv *iv;
1909 tree op, *lhs, *rhs;
1910 ssa_op_iter iter;
1911 use_operand_p use_p;
1912 enum tree_code code;
1914 find_invariants_stmt (data, stmt);
1916 if (gimple_code (stmt) == GIMPLE_COND)
1918 find_interesting_uses_cond (data, stmt);
1919 return;
1922 if (is_gimple_assign (stmt))
1924 lhs = gimple_assign_lhs_ptr (stmt);
1925 rhs = gimple_assign_rhs1_ptr (stmt);
1927 if (TREE_CODE (*lhs) == SSA_NAME)
1929 /* If the statement defines an induction variable, the uses are not
1930 interesting by themselves. */
1932 iv = get_iv (data, *lhs);
1934 if (iv && !integer_zerop (iv->step))
1935 return;
1938 code = gimple_assign_rhs_code (stmt);
1939 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1940 && (REFERENCE_CLASS_P (*rhs)
1941 || is_gimple_val (*rhs)))
1943 if (REFERENCE_CLASS_P (*rhs))
1944 find_interesting_uses_address (data, stmt, rhs);
1945 else
1946 find_interesting_uses_op (data, *rhs);
1948 if (REFERENCE_CLASS_P (*lhs))
1949 find_interesting_uses_address (data, stmt, lhs);
1950 return;
1952 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1954 find_interesting_uses_cond (data, stmt);
1955 return;
1958 /* TODO -- we should also handle address uses of type
1960 memory = call (whatever);
1964 call (memory). */
1967 if (gimple_code (stmt) == GIMPLE_PHI
1968 && gimple_bb (stmt) == data->current_loop->header)
1970 iv = get_iv (data, PHI_RESULT (stmt));
1972 if (iv && !integer_zerop (iv->step))
1973 return;
1976 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1978 op = USE_FROM_PTR (use_p);
1980 if (TREE_CODE (op) != SSA_NAME)
1981 continue;
1983 iv = get_iv (data, op);
1984 if (!iv)
1985 continue;
1987 find_interesting_uses_op (data, op);
1991 /* Finds interesting uses of induction variables outside of loops
1992 on loop exit edge EXIT. */
1994 static void
1995 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1997 gimple phi;
1998 gimple_stmt_iterator psi;
1999 tree def;
2001 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2003 phi = gsi_stmt (psi);
2004 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2005 if (!virtual_operand_p (def))
2006 find_interesting_uses_op (data, def);
2010 /* Finds uses of the induction variables that are interesting. */
2012 static void
2013 find_interesting_uses (struct ivopts_data *data)
2015 basic_block bb;
2016 gimple_stmt_iterator bsi;
2017 basic_block *body = get_loop_body (data->current_loop);
2018 unsigned i;
2019 struct version_info *info;
2020 edge e;
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2023 fprintf (dump_file, "Uses:\n\n");
2025 for (i = 0; i < data->current_loop->num_nodes; i++)
2027 edge_iterator ei;
2028 bb = body[i];
2030 FOR_EACH_EDGE (e, ei, bb->succs)
2031 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2032 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2033 find_interesting_uses_outside (data, e);
2035 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2036 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2037 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2038 if (!is_gimple_debug (gsi_stmt (bsi)))
2039 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2044 bitmap_iterator bi;
2046 fprintf (dump_file, "\n");
2048 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2050 info = ver_info (data, i);
2051 if (info->inv_id)
2053 fprintf (dump_file, " ");
2054 print_generic_expr (dump_file, info->name, TDF_SLIM);
2055 fprintf (dump_file, " is invariant (%d)%s\n",
2056 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2060 fprintf (dump_file, "\n");
2063 free (body);
2066 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2067 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2068 we are at the top-level of the processed address. */
2070 static tree
2071 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2072 HOST_WIDE_INT *offset)
2074 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2075 enum tree_code code;
2076 tree type, orig_type = TREE_TYPE (expr);
2077 HOST_WIDE_INT off0, off1, st;
2078 tree orig_expr = expr;
2080 STRIP_NOPS (expr);
2082 type = TREE_TYPE (expr);
2083 code = TREE_CODE (expr);
2084 *offset = 0;
2086 switch (code)
2088 case INTEGER_CST:
2089 if (!cst_and_fits_in_hwi (expr)
2090 || integer_zerop (expr))
2091 return orig_expr;
2093 *offset = int_cst_value (expr);
2094 return build_int_cst (orig_type, 0);
2096 case POINTER_PLUS_EXPR:
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 op0 = TREE_OPERAND (expr, 0);
2100 op1 = TREE_OPERAND (expr, 1);
2102 op0 = strip_offset_1 (op0, false, false, &off0);
2103 op1 = strip_offset_1 (op1, false, false, &off1);
2105 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2106 if (op0 == TREE_OPERAND (expr, 0)
2107 && op1 == TREE_OPERAND (expr, 1))
2108 return orig_expr;
2110 if (integer_zerop (op1))
2111 expr = op0;
2112 else if (integer_zerop (op0))
2114 if (code == MINUS_EXPR)
2115 expr = fold_build1 (NEGATE_EXPR, type, op1);
2116 else
2117 expr = op1;
2119 else
2120 expr = fold_build2 (code, type, op0, op1);
2122 return fold_convert (orig_type, expr);
2124 case MULT_EXPR:
2125 op1 = TREE_OPERAND (expr, 1);
2126 if (!cst_and_fits_in_hwi (op1))
2127 return orig_expr;
2129 op0 = TREE_OPERAND (expr, 0);
2130 op0 = strip_offset_1 (op0, false, false, &off0);
2131 if (op0 == TREE_OPERAND (expr, 0))
2132 return orig_expr;
2134 *offset = off0 * int_cst_value (op1);
2135 if (integer_zerop (op0))
2136 expr = op0;
2137 else
2138 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2140 return fold_convert (orig_type, expr);
2142 case ARRAY_REF:
2143 case ARRAY_RANGE_REF:
2144 if (!inside_addr)
2145 return orig_expr;
2147 step = array_ref_element_size (expr);
2148 if (!cst_and_fits_in_hwi (step))
2149 break;
2151 st = int_cst_value (step);
2152 op1 = TREE_OPERAND (expr, 1);
2153 op1 = strip_offset_1 (op1, false, false, &off1);
2154 *offset = off1 * st;
2156 if (top_compref
2157 && integer_zerop (op1))
2159 /* Strip the component reference completely. */
2160 op0 = TREE_OPERAND (expr, 0);
2161 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2162 *offset += off0;
2163 return op0;
2165 break;
2167 case COMPONENT_REF:
2169 tree field;
2171 if (!inside_addr)
2172 return orig_expr;
2174 tmp = component_ref_field_offset (expr);
2175 field = TREE_OPERAND (expr, 1);
2176 if (top_compref
2177 && cst_and_fits_in_hwi (tmp)
2178 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2180 HOST_WIDE_INT boffset, abs_off;
2182 /* Strip the component reference completely. */
2183 op0 = TREE_OPERAND (expr, 0);
2184 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2185 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2186 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2187 if (boffset < 0)
2188 abs_off = -abs_off;
2190 *offset = off0 + int_cst_value (tmp) + abs_off;
2191 return op0;
2194 break;
2196 case ADDR_EXPR:
2197 op0 = TREE_OPERAND (expr, 0);
2198 op0 = strip_offset_1 (op0, true, true, &off0);
2199 *offset += off0;
2201 if (op0 == TREE_OPERAND (expr, 0))
2202 return orig_expr;
2204 expr = build_fold_addr_expr (op0);
2205 return fold_convert (orig_type, expr);
2207 case MEM_REF:
2208 /* ??? Offset operand? */
2209 inside_addr = false;
2210 break;
2212 default:
2213 return orig_expr;
2216 /* Default handling of expressions for that we want to recurse into
2217 the first operand. */
2218 op0 = TREE_OPERAND (expr, 0);
2219 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2220 *offset += off0;
2222 if (op0 == TREE_OPERAND (expr, 0)
2223 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2224 return orig_expr;
2226 expr = copy_node (expr);
2227 TREE_OPERAND (expr, 0) = op0;
2228 if (op1)
2229 TREE_OPERAND (expr, 1) = op1;
2231 /* Inside address, we might strip the top level component references,
2232 thus changing type of the expression. Handling of ADDR_EXPR
2233 will fix that. */
2234 expr = fold_convert (orig_type, expr);
2236 return expr;
2239 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2241 static tree
2242 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2244 HOST_WIDE_INT off;
2245 tree core = strip_offset_1 (expr, false, false, &off);
2246 *offset = off;
2247 return core;
2250 /* Returns variant of TYPE that can be used as base for different uses.
2251 We return unsigned type with the same precision, which avoids problems
2252 with overflows. */
2254 static tree
2255 generic_type_for (tree type)
2257 if (POINTER_TYPE_P (type))
2258 return unsigned_type_for (type);
2260 if (TYPE_UNSIGNED (type))
2261 return type;
2263 return unsigned_type_for (type);
2266 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2267 the bitmap to that we should store it. */
2269 static struct ivopts_data *fd_ivopts_data;
2270 static tree
2271 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2273 bitmap *depends_on = (bitmap *) data;
2274 struct version_info *info;
2276 if (TREE_CODE (*expr_p) != SSA_NAME)
2277 return NULL_TREE;
2278 info = name_info (fd_ivopts_data, *expr_p);
2280 if (!info->inv_id || info->has_nonlin_use)
2281 return NULL_TREE;
2283 if (!*depends_on)
2284 *depends_on = BITMAP_ALLOC (NULL);
2285 bitmap_set_bit (*depends_on, info->inv_id);
2287 return NULL_TREE;
2290 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2291 position to POS. If USE is not NULL, the candidate is set as related to
2292 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2293 replacement of the final value of the iv by a direct computation. */
2295 static struct iv_cand *
2296 add_candidate_1 (struct ivopts_data *data,
2297 tree base, tree step, bool important, enum iv_position pos,
2298 struct iv_use *use, gimple incremented_at)
2300 unsigned i;
2301 struct iv_cand *cand = NULL;
2302 tree type, orig_type;
2304 /* For non-original variables, make sure their values are computed in a type
2305 that does not invoke undefined behavior on overflows (since in general,
2306 we cannot prove that these induction variables are non-wrapping). */
2307 if (pos != IP_ORIGINAL)
2309 orig_type = TREE_TYPE (base);
2310 type = generic_type_for (orig_type);
2311 if (type != orig_type)
2313 base = fold_convert (type, base);
2314 step = fold_convert (type, step);
2318 for (i = 0; i < n_iv_cands (data); i++)
2320 cand = iv_cand (data, i);
2322 if (cand->pos != pos)
2323 continue;
2325 if (cand->incremented_at != incremented_at
2326 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2327 && cand->ainc_use != use))
2328 continue;
2330 if (!cand->iv)
2332 if (!base && !step)
2333 break;
2335 continue;
2338 if (!base && !step)
2339 continue;
2341 if (operand_equal_p (base, cand->iv->base, 0)
2342 && operand_equal_p (step, cand->iv->step, 0)
2343 && (TYPE_PRECISION (TREE_TYPE (base))
2344 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2345 break;
2348 if (i == n_iv_cands (data))
2350 cand = XCNEW (struct iv_cand);
2351 cand->id = i;
2353 if (!base && !step)
2354 cand->iv = NULL;
2355 else
2356 cand->iv = alloc_iv (base, step);
2358 cand->pos = pos;
2359 if (pos != IP_ORIGINAL && cand->iv)
2361 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2362 cand->var_after = cand->var_before;
2364 cand->important = important;
2365 cand->incremented_at = incremented_at;
2366 data->iv_candidates.safe_push (cand);
2368 if (step
2369 && TREE_CODE (step) != INTEGER_CST)
2371 fd_ivopts_data = data;
2372 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2375 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2376 cand->ainc_use = use;
2377 else
2378 cand->ainc_use = NULL;
2380 if (dump_file && (dump_flags & TDF_DETAILS))
2381 dump_cand (dump_file, cand);
2384 if (important && !cand->important)
2386 cand->important = true;
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2391 if (use)
2393 bitmap_set_bit (use->related_cands, i);
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2395 fprintf (dump_file, "Candidate %d is related to use %d\n",
2396 cand->id, use->id);
2399 return cand;
2402 /* Returns true if incrementing the induction variable at the end of the LOOP
2403 is allowed.
2405 The purpose is to avoid splitting latch edge with a biv increment, thus
2406 creating a jump, possibly confusing other optimization passes and leaving
2407 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2408 is not available (so we do not have a better alternative), or if the latch
2409 edge is already nonempty. */
2411 static bool
2412 allow_ip_end_pos_p (struct loop *loop)
2414 if (!ip_normal_pos (loop))
2415 return true;
2417 if (!empty_block_p (ip_end_pos (loop)))
2418 return true;
2420 return false;
2423 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2424 Important field is set to IMPORTANT. */
2426 static void
2427 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2428 bool important, struct iv_use *use)
2430 basic_block use_bb = gimple_bb (use->stmt);
2431 enum machine_mode mem_mode;
2432 unsigned HOST_WIDE_INT cstepi;
2434 /* If we insert the increment in any position other than the standard
2435 ones, we must ensure that it is incremented once per iteration.
2436 It must not be in an inner nested loop, or one side of an if
2437 statement. */
2438 if (use_bb->loop_father != data->current_loop
2439 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2440 || stmt_could_throw_p (use->stmt)
2441 || !cst_and_fits_in_hwi (step))
2442 return;
2444 cstepi = int_cst_value (step);
2446 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2447 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2448 || USE_STORE_PRE_INCREMENT (mem_mode))
2449 && GET_MODE_SIZE (mem_mode) == cstepi)
2450 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2451 || USE_STORE_PRE_DECREMENT (mem_mode))
2452 && GET_MODE_SIZE (mem_mode) == -cstepi))
2454 enum tree_code code = MINUS_EXPR;
2455 tree new_base;
2456 tree new_step = step;
2458 if (POINTER_TYPE_P (TREE_TYPE (base)))
2460 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2461 code = POINTER_PLUS_EXPR;
2463 else
2464 new_step = fold_convert (TREE_TYPE (base), new_step);
2465 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2466 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2467 use->stmt);
2469 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2470 || USE_STORE_POST_INCREMENT (mem_mode))
2471 && GET_MODE_SIZE (mem_mode) == cstepi)
2472 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2473 || USE_STORE_POST_DECREMENT (mem_mode))
2474 && GET_MODE_SIZE (mem_mode) == -cstepi))
2476 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2477 use->stmt);
2481 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2482 position to POS. If USE is not NULL, the candidate is set as related to
2483 it. The candidate computation is scheduled on all available positions. */
2485 static void
2486 add_candidate (struct ivopts_data *data,
2487 tree base, tree step, bool important, struct iv_use *use)
2489 if (ip_normal_pos (data->current_loop))
2490 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2491 if (ip_end_pos (data->current_loop)
2492 && allow_ip_end_pos_p (data->current_loop))
2493 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2495 if (use != NULL && use->type == USE_ADDRESS)
2496 add_autoinc_candidates (data, base, step, important, use);
2499 /* Adds standard iv candidates. */
2501 static void
2502 add_standard_iv_candidates (struct ivopts_data *data)
2504 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2506 /* The same for a double-integer type if it is still fast enough. */
2507 if (TYPE_PRECISION
2508 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2509 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2510 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2511 build_int_cst (long_integer_type_node, 1), true, NULL);
2513 /* The same for a double-integer type if it is still fast enough. */
2514 if (TYPE_PRECISION
2515 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2516 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2517 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2518 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2522 /* Adds candidates bases on the old induction variable IV. */
2524 static void
2525 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2527 gimple phi;
2528 tree def;
2529 struct iv_cand *cand;
2531 add_candidate (data, iv->base, iv->step, true, NULL);
2533 /* The same, but with initial value zero. */
2534 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2535 add_candidate (data, size_int (0), iv->step, true, NULL);
2536 else
2537 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2538 iv->step, true, NULL);
2540 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2541 if (gimple_code (phi) == GIMPLE_PHI)
2543 /* Additionally record the possibility of leaving the original iv
2544 untouched. */
2545 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2546 /* Don't add candidate if it's from another PHI node because
2547 it's an affine iv appearing in the form of PEELED_CHREC. */
2548 phi = SSA_NAME_DEF_STMT (def);
2549 if (gimple_code (phi) != GIMPLE_PHI)
2551 cand = add_candidate_1 (data,
2552 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2553 SSA_NAME_DEF_STMT (def));
2554 cand->var_before = iv->ssa_name;
2555 cand->var_after = def;
2557 else
2558 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2562 /* Adds candidates based on the old induction variables. */
2564 static void
2565 add_old_ivs_candidates (struct ivopts_data *data)
2567 unsigned i;
2568 struct iv *iv;
2569 bitmap_iterator bi;
2571 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2573 iv = ver_info (data, i)->iv;
2574 if (iv && iv->biv_p && !integer_zerop (iv->step))
2575 add_old_iv_candidates (data, iv);
2579 /* Adds candidates based on the value of the induction variable IV and USE. */
2581 static void
2582 add_iv_value_candidates (struct ivopts_data *data,
2583 struct iv *iv, struct iv_use *use)
2585 unsigned HOST_WIDE_INT offset;
2586 tree base;
2587 tree basetype;
2589 add_candidate (data, iv->base, iv->step, false, use);
2591 /* The same, but with initial value zero. Make such variable important,
2592 since it is generic enough so that possibly many uses may be based
2593 on it. */
2594 basetype = TREE_TYPE (iv->base);
2595 if (POINTER_TYPE_P (basetype))
2596 basetype = sizetype;
2597 add_candidate (data, build_int_cst (basetype, 0),
2598 iv->step, true, use);
2600 /* Third, try removing the constant offset. Make sure to even
2601 add a candidate for &a[0] vs. (T *)&a. */
2602 base = strip_offset (iv->base, &offset);
2603 if (offset
2604 || base != iv->base)
2605 add_candidate (data, base, iv->step, false, use);
2608 /* Adds candidates based on the uses. */
2610 static void
2611 add_derived_ivs_candidates (struct ivopts_data *data)
2613 unsigned i;
2615 for (i = 0; i < n_iv_uses (data); i++)
2617 struct iv_use *use = iv_use (data, i);
2619 if (!use)
2620 continue;
2622 switch (use->type)
2624 case USE_NONLINEAR_EXPR:
2625 case USE_COMPARE:
2626 case USE_ADDRESS:
2627 /* Just add the ivs based on the value of the iv used here. */
2628 add_iv_value_candidates (data, use->iv, use);
2629 break;
2631 default:
2632 gcc_unreachable ();
2637 /* Record important candidates and add them to related_cands bitmaps
2638 if needed. */
2640 static void
2641 record_important_candidates (struct ivopts_data *data)
2643 unsigned i;
2644 struct iv_use *use;
2646 for (i = 0; i < n_iv_cands (data); i++)
2648 struct iv_cand *cand = iv_cand (data, i);
2650 if (cand->important)
2651 bitmap_set_bit (data->important_candidates, i);
2654 data->consider_all_candidates = (n_iv_cands (data)
2655 <= CONSIDER_ALL_CANDIDATES_BOUND);
2657 if (data->consider_all_candidates)
2659 /* We will not need "related_cands" bitmaps in this case,
2660 so release them to decrease peak memory consumption. */
2661 for (i = 0; i < n_iv_uses (data); i++)
2663 use = iv_use (data, i);
2664 BITMAP_FREE (use->related_cands);
2667 else
2669 /* Add important candidates to the related_cands bitmaps. */
2670 for (i = 0; i < n_iv_uses (data); i++)
2671 bitmap_ior_into (iv_use (data, i)->related_cands,
2672 data->important_candidates);
2676 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2677 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2678 we allocate a simple list to every use. */
2680 static void
2681 alloc_use_cost_map (struct ivopts_data *data)
2683 unsigned i, size, s;
2685 for (i = 0; i < n_iv_uses (data); i++)
2687 struct iv_use *use = iv_use (data, i);
2689 if (data->consider_all_candidates)
2690 size = n_iv_cands (data);
2691 else
2693 s = bitmap_count_bits (use->related_cands);
2695 /* Round up to the power of two, so that moduling by it is fast. */
2696 size = s ? (1 << ceil_log2 (s)) : 1;
2699 use->n_map_members = size;
2700 use->cost_map = XCNEWVEC (struct cost_pair, size);
2704 /* Returns description of computation cost of expression whose runtime
2705 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2707 static comp_cost
2708 new_cost (unsigned runtime, unsigned complexity)
2710 comp_cost cost;
2712 cost.cost = runtime;
2713 cost.complexity = complexity;
2715 return cost;
2718 /* Adds costs COST1 and COST2. */
2720 static comp_cost
2721 add_costs (comp_cost cost1, comp_cost cost2)
2723 cost1.cost += cost2.cost;
2724 cost1.complexity += cost2.complexity;
2726 return cost1;
2728 /* Subtracts costs COST1 and COST2. */
2730 static comp_cost
2731 sub_costs (comp_cost cost1, comp_cost cost2)
2733 cost1.cost -= cost2.cost;
2734 cost1.complexity -= cost2.complexity;
2736 return cost1;
2739 /* Returns a negative number if COST1 < COST2, a positive number if
2740 COST1 > COST2, and 0 if COST1 = COST2. */
2742 static int
2743 compare_costs (comp_cost cost1, comp_cost cost2)
2745 if (cost1.cost == cost2.cost)
2746 return cost1.complexity - cost2.complexity;
2748 return cost1.cost - cost2.cost;
2751 /* Returns true if COST is infinite. */
2753 static bool
2754 infinite_cost_p (comp_cost cost)
2756 return cost.cost == INFTY;
2759 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2760 on invariants DEPENDS_ON and that the value used in expressing it
2761 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2763 static void
2764 set_use_iv_cost (struct ivopts_data *data,
2765 struct iv_use *use, struct iv_cand *cand,
2766 comp_cost cost, bitmap depends_on, tree value,
2767 enum tree_code comp, int inv_expr_id)
2769 unsigned i, s;
2771 if (infinite_cost_p (cost))
2773 BITMAP_FREE (depends_on);
2774 return;
2777 if (data->consider_all_candidates)
2779 use->cost_map[cand->id].cand = cand;
2780 use->cost_map[cand->id].cost = cost;
2781 use->cost_map[cand->id].depends_on = depends_on;
2782 use->cost_map[cand->id].value = value;
2783 use->cost_map[cand->id].comp = comp;
2784 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2785 return;
2788 /* n_map_members is a power of two, so this computes modulo. */
2789 s = cand->id & (use->n_map_members - 1);
2790 for (i = s; i < use->n_map_members; i++)
2791 if (!use->cost_map[i].cand)
2792 goto found;
2793 for (i = 0; i < s; i++)
2794 if (!use->cost_map[i].cand)
2795 goto found;
2797 gcc_unreachable ();
2799 found:
2800 use->cost_map[i].cand = cand;
2801 use->cost_map[i].cost = cost;
2802 use->cost_map[i].depends_on = depends_on;
2803 use->cost_map[i].value = value;
2804 use->cost_map[i].comp = comp;
2805 use->cost_map[i].inv_expr_id = inv_expr_id;
2808 /* Gets cost of (USE, CANDIDATE) pair. */
2810 static struct cost_pair *
2811 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2812 struct iv_cand *cand)
2814 unsigned i, s;
2815 struct cost_pair *ret;
2817 if (!cand)
2818 return NULL;
2820 if (data->consider_all_candidates)
2822 ret = use->cost_map + cand->id;
2823 if (!ret->cand)
2824 return NULL;
2826 return ret;
2829 /* n_map_members is a power of two, so this computes modulo. */
2830 s = cand->id & (use->n_map_members - 1);
2831 for (i = s; i < use->n_map_members; i++)
2832 if (use->cost_map[i].cand == cand)
2833 return use->cost_map + i;
2834 else if (use->cost_map[i].cand == NULL)
2835 return NULL;
2836 for (i = 0; i < s; i++)
2837 if (use->cost_map[i].cand == cand)
2838 return use->cost_map + i;
2839 else if (use->cost_map[i].cand == NULL)
2840 return NULL;
2842 return NULL;
2845 /* Returns estimate on cost of computing SEQ. */
2847 static unsigned
2848 seq_cost (rtx_insn *seq, bool speed)
2850 unsigned cost = 0;
2851 rtx set;
2853 for (; seq; seq = NEXT_INSN (seq))
2855 set = single_set (seq);
2856 if (set)
2857 cost += set_src_cost (SET_SRC (set), speed);
2858 else
2859 cost++;
2862 return cost;
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2866 static rtx
2867 produce_memory_decl_rtl (tree obj, int *regno)
2869 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2870 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2871 rtx x;
2873 gcc_assert (obj);
2874 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2876 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2877 x = gen_rtx_SYMBOL_REF (address_mode, name);
2878 SET_SYMBOL_REF_DECL (x, obj);
2879 x = gen_rtx_MEM (DECL_MODE (obj), x);
2880 set_mem_addr_space (x, as);
2881 targetm.encode_section_info (obj, x, true);
2883 else
2885 x = gen_raw_REG (address_mode, (*regno)++);
2886 x = gen_rtx_MEM (DECL_MODE (obj), x);
2887 set_mem_addr_space (x, as);
2890 return x;
2893 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2894 walk_tree. DATA contains the actual fake register number. */
2896 static tree
2897 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2899 tree obj = NULL_TREE;
2900 rtx x = NULL_RTX;
2901 int *regno = (int *) data;
2903 switch (TREE_CODE (*expr_p))
2905 case ADDR_EXPR:
2906 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2907 handled_component_p (*expr_p);
2908 expr_p = &TREE_OPERAND (*expr_p, 0))
2909 continue;
2910 obj = *expr_p;
2911 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2912 x = produce_memory_decl_rtl (obj, regno);
2913 break;
2915 case SSA_NAME:
2916 *ws = 0;
2917 obj = SSA_NAME_VAR (*expr_p);
2918 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2919 if (!obj)
2920 return NULL_TREE;
2921 if (!DECL_RTL_SET_P (obj))
2922 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2923 break;
2925 case VAR_DECL:
2926 case PARM_DECL:
2927 case RESULT_DECL:
2928 *ws = 0;
2929 obj = *expr_p;
2931 if (DECL_RTL_SET_P (obj))
2932 break;
2934 if (DECL_MODE (obj) == BLKmode)
2935 x = produce_memory_decl_rtl (obj, regno);
2936 else
2937 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2939 break;
2941 default:
2942 break;
2945 if (x)
2947 decl_rtl_to_reset.safe_push (obj);
2948 SET_DECL_RTL (obj, x);
2951 return NULL_TREE;
2954 /* Determines cost of the computation of EXPR. */
2956 static unsigned
2957 computation_cost (tree expr, bool speed)
2959 rtx_insn *seq;
2960 rtx rslt;
2961 tree type = TREE_TYPE (expr);
2962 unsigned cost;
2963 /* Avoid using hard regs in ways which may be unsupported. */
2964 int regno = LAST_VIRTUAL_REGISTER + 1;
2965 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2966 enum node_frequency real_frequency = node->frequency;
2968 node->frequency = NODE_FREQUENCY_NORMAL;
2969 crtl->maybe_hot_insn_p = speed;
2970 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2971 start_sequence ();
2972 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2973 seq = get_insns ();
2974 end_sequence ();
2975 default_rtl_profile ();
2976 node->frequency = real_frequency;
2978 cost = seq_cost (seq, speed);
2979 if (MEM_P (rslt))
2980 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2981 TYPE_ADDR_SPACE (type), speed);
2982 else if (!REG_P (rslt))
2983 cost += set_src_cost (rslt, speed);
2985 return cost;
2988 /* Returns variable containing the value of candidate CAND at statement AT. */
2990 static tree
2991 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2993 if (stmt_after_increment (loop, cand, stmt))
2994 return cand->var_after;
2995 else
2996 return cand->var_before;
2999 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3000 same precision that is at least as wide as the precision of TYPE, stores
3001 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3002 type of A and B. */
3004 static tree
3005 determine_common_wider_type (tree *a, tree *b)
3007 tree wider_type = NULL;
3008 tree suba, subb;
3009 tree atype = TREE_TYPE (*a);
3011 if (CONVERT_EXPR_P (*a))
3013 suba = TREE_OPERAND (*a, 0);
3014 wider_type = TREE_TYPE (suba);
3015 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3016 return atype;
3018 else
3019 return atype;
3021 if (CONVERT_EXPR_P (*b))
3023 subb = TREE_OPERAND (*b, 0);
3024 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3025 return atype;
3027 else
3028 return atype;
3030 *a = suba;
3031 *b = subb;
3032 return wider_type;
3035 /* Determines the expression by that USE is expressed from induction variable
3036 CAND at statement AT in LOOP. The expression is stored in a decomposed
3037 form into AFF. Returns false if USE cannot be expressed using CAND. */
3039 static bool
3040 get_computation_aff (struct loop *loop,
3041 struct iv_use *use, struct iv_cand *cand, gimple at,
3042 struct aff_tree *aff)
3044 tree ubase = use->iv->base;
3045 tree ustep = use->iv->step;
3046 tree cbase = cand->iv->base;
3047 tree cstep = cand->iv->step, cstep_common;
3048 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3049 tree common_type, var;
3050 tree uutype;
3051 aff_tree cbase_aff, var_aff;
3052 widest_int rat;
3054 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3056 /* We do not have a precision to express the values of use. */
3057 return false;
3060 var = var_at_stmt (loop, cand, at);
3061 uutype = unsigned_type_for (utype);
3063 /* If the conversion is not noop, perform it. */
3064 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3066 cstep = fold_convert (uutype, cstep);
3067 cbase = fold_convert (uutype, cbase);
3068 var = fold_convert (uutype, var);
3071 if (!constant_multiple_of (ustep, cstep, &rat))
3072 return false;
3074 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3075 type, we achieve better folding by computing their difference in this
3076 wider type, and cast the result to UUTYPE. We do not need to worry about
3077 overflows, as all the arithmetics will in the end be performed in UUTYPE
3078 anyway. */
3079 common_type = determine_common_wider_type (&ubase, &cbase);
3081 /* use = ubase - ratio * cbase + ratio * var. */
3082 tree_to_aff_combination (ubase, common_type, aff);
3083 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3084 tree_to_aff_combination (var, uutype, &var_aff);
3086 /* We need to shift the value if we are after the increment. */
3087 if (stmt_after_increment (loop, cand, at))
3089 aff_tree cstep_aff;
3091 if (common_type != uutype)
3092 cstep_common = fold_convert (common_type, cstep);
3093 else
3094 cstep_common = cstep;
3096 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3097 aff_combination_add (&cbase_aff, &cstep_aff);
3100 aff_combination_scale (&cbase_aff, -rat);
3101 aff_combination_add (aff, &cbase_aff);
3102 if (common_type != uutype)
3103 aff_combination_convert (aff, uutype);
3105 aff_combination_scale (&var_aff, rat);
3106 aff_combination_add (aff, &var_aff);
3108 return true;
3111 /* Return the type of USE. */
3113 static tree
3114 get_use_type (struct iv_use *use)
3116 tree base_type = TREE_TYPE (use->iv->base);
3117 tree type;
3119 if (use->type == USE_ADDRESS)
3121 /* The base_type may be a void pointer. Create a pointer type based on
3122 the mem_ref instead. */
3123 type = build_pointer_type (TREE_TYPE (*use->op_p));
3124 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3125 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3127 else
3128 type = base_type;
3130 return type;
3133 /* Determines the expression by that USE is expressed from induction variable
3134 CAND at statement AT in LOOP. The computation is unshared. */
3136 static tree
3137 get_computation_at (struct loop *loop,
3138 struct iv_use *use, struct iv_cand *cand, gimple at)
3140 aff_tree aff;
3141 tree type = get_use_type (use);
3143 if (!get_computation_aff (loop, use, cand, at, &aff))
3144 return NULL_TREE;
3145 unshare_aff_combination (&aff);
3146 return fold_convert (type, aff_combination_to_tree (&aff));
3149 /* Determines the expression by that USE is expressed from induction variable
3150 CAND in LOOP. The computation is unshared. */
3152 static tree
3153 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3155 return get_computation_at (loop, use, cand, use->stmt);
3158 /* Adjust the cost COST for being in loop setup rather than loop body.
3159 If we're optimizing for space, the loop setup overhead is constant;
3160 if we're optimizing for speed, amortize it over the per-iteration cost. */
3161 static unsigned
3162 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3164 if (cost == INFTY)
3165 return cost;
3166 else if (optimize_loop_for_speed_p (data->current_loop))
3167 return cost / avg_loop_niter (data->current_loop);
3168 else
3169 return cost;
3172 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3173 validity for a memory reference accessing memory of mode MODE in
3174 address space AS. */
3177 bool
3178 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3179 addr_space_t as)
3181 #define MAX_RATIO 128
3182 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3183 static vec<sbitmap> valid_mult_list;
3184 sbitmap valid_mult;
3186 if (data_index >= valid_mult_list.length ())
3187 valid_mult_list.safe_grow_cleared (data_index + 1);
3189 valid_mult = valid_mult_list[data_index];
3190 if (!valid_mult)
3192 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3193 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3194 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3195 rtx addr, scaled;
3196 HOST_WIDE_INT i;
3198 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3199 bitmap_clear (valid_mult);
3200 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3201 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3202 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3204 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3205 if (memory_address_addr_space_p (mode, addr, as)
3206 || memory_address_addr_space_p (mode, scaled, as))
3207 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3210 if (dump_file && (dump_flags & TDF_DETAILS))
3212 fprintf (dump_file, " allowed multipliers:");
3213 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3215 fprintf (dump_file, " %d", (int) i);
3216 fprintf (dump_file, "\n");
3217 fprintf (dump_file, "\n");
3220 valid_mult_list[data_index] = valid_mult;
3223 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3224 return false;
3226 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3229 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3230 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3231 variable is omitted. Compute the cost for a memory reference that accesses
3232 a memory location of mode MEM_MODE in address space AS.
3234 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3235 size of MEM_MODE / RATIO) is available. To make this determination, we
3236 look at the size of the increment to be made, which is given in CSTEP.
3237 CSTEP may be zero if the step is unknown.
3238 STMT_AFTER_INC is true iff the statement we're looking at is after the
3239 increment of the original biv.
3241 TODO -- there must be some better way. This all is quite crude. */
3243 enum ainc_type
3245 AINC_PRE_INC, /* Pre increment. */
3246 AINC_PRE_DEC, /* Pre decrement. */
3247 AINC_POST_INC, /* Post increment. */
3248 AINC_POST_DEC, /* Post decrement. */
3249 AINC_NONE /* Also the number of auto increment types. */
3252 typedef struct address_cost_data_s
3254 HOST_WIDE_INT min_offset, max_offset;
3255 unsigned costs[2][2][2][2];
3256 unsigned ainc_costs[AINC_NONE];
3257 } *address_cost_data;
3260 static comp_cost
3261 get_address_cost (bool symbol_present, bool var_present,
3262 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3263 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3264 addr_space_t as, bool speed,
3265 bool stmt_after_inc, bool *may_autoinc)
3267 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3268 static vec<address_cost_data> address_cost_data_list;
3269 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3270 address_cost_data data;
3271 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3272 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3273 unsigned cost, acost, complexity;
3274 enum ainc_type autoinc_type;
3275 bool offset_p, ratio_p, autoinc;
3276 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3277 unsigned HOST_WIDE_INT mask;
3278 unsigned bits;
3280 if (data_index >= address_cost_data_list.length ())
3281 address_cost_data_list.safe_grow_cleared (data_index + 1);
3283 data = address_cost_data_list[data_index];
3284 if (!data)
3286 HOST_WIDE_INT i;
3287 HOST_WIDE_INT rat, off = 0;
3288 int old_cse_not_expected, width;
3289 unsigned sym_p, var_p, off_p, rat_p, add_c;
3290 rtx_insn *seq;
3291 rtx addr, base;
3292 rtx reg0, reg1;
3294 data = (address_cost_data) xcalloc (1, sizeof (*data));
3296 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3298 width = GET_MODE_BITSIZE (address_mode) - 1;
3299 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3300 width = HOST_BITS_PER_WIDE_INT - 1;
3301 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3303 for (i = width; i >= 0; i--)
3305 off = -((unsigned HOST_WIDE_INT) 1 << i);
3306 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3307 if (memory_address_addr_space_p (mem_mode, addr, as))
3308 break;
3310 data->min_offset = (i == -1? 0 : off);
3312 for (i = width; i >= 0; i--)
3314 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3315 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316 if (memory_address_addr_space_p (mem_mode, addr, as))
3317 break;
3318 /* For some TARGET, like ARM THUMB1, the offset should be nature
3319 aligned. Try an aligned offset if address_mode is not QImode. */
3320 off = (address_mode == QImode)
3322 : ((unsigned HOST_WIDE_INT) 1 << i)
3323 - GET_MODE_SIZE (address_mode);
3324 if (off > 0)
3326 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3327 if (memory_address_addr_space_p (mem_mode, addr, as))
3328 break;
3331 if (i == -1)
3332 off = 0;
3333 data->max_offset = off;
3335 if (dump_file && (dump_flags & TDF_DETAILS))
3337 fprintf (dump_file, "get_address_cost:\n");
3338 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3339 GET_MODE_NAME (mem_mode),
3340 data->min_offset);
3341 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3342 GET_MODE_NAME (mem_mode),
3343 data->max_offset);
3346 rat = 1;
3347 for (i = 2; i <= MAX_RATIO; i++)
3348 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3350 rat = i;
3351 break;
3354 /* Compute the cost of various addressing modes. */
3355 acost = 0;
3356 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3357 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3359 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3360 || USE_STORE_PRE_DECREMENT (mem_mode))
3362 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3363 has_predec[mem_mode]
3364 = memory_address_addr_space_p (mem_mode, addr, as);
3366 if (has_predec[mem_mode])
3367 data->ainc_costs[AINC_PRE_DEC]
3368 = address_cost (addr, mem_mode, as, speed);
3370 if (USE_LOAD_POST_DECREMENT (mem_mode)
3371 || USE_STORE_POST_DECREMENT (mem_mode))
3373 addr = gen_rtx_POST_DEC (address_mode, reg0);
3374 has_postdec[mem_mode]
3375 = memory_address_addr_space_p (mem_mode, addr, as);
3377 if (has_postdec[mem_mode])
3378 data->ainc_costs[AINC_POST_DEC]
3379 = address_cost (addr, mem_mode, as, speed);
3381 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3382 || USE_STORE_PRE_DECREMENT (mem_mode))
3384 addr = gen_rtx_PRE_INC (address_mode, reg0);
3385 has_preinc[mem_mode]
3386 = memory_address_addr_space_p (mem_mode, addr, as);
3388 if (has_preinc[mem_mode])
3389 data->ainc_costs[AINC_PRE_INC]
3390 = address_cost (addr, mem_mode, as, speed);
3392 if (USE_LOAD_POST_INCREMENT (mem_mode)
3393 || USE_STORE_POST_INCREMENT (mem_mode))
3395 addr = gen_rtx_POST_INC (address_mode, reg0);
3396 has_postinc[mem_mode]
3397 = memory_address_addr_space_p (mem_mode, addr, as);
3399 if (has_postinc[mem_mode])
3400 data->ainc_costs[AINC_POST_INC]
3401 = address_cost (addr, mem_mode, as, speed);
3403 for (i = 0; i < 16; i++)
3405 sym_p = i & 1;
3406 var_p = (i >> 1) & 1;
3407 off_p = (i >> 2) & 1;
3408 rat_p = (i >> 3) & 1;
3410 addr = reg0;
3411 if (rat_p)
3412 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3413 gen_int_mode (rat, address_mode));
3415 if (var_p)
3416 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3418 if (sym_p)
3420 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3421 /* ??? We can run into trouble with some backends by presenting
3422 it with symbols which haven't been properly passed through
3423 targetm.encode_section_info. By setting the local bit, we
3424 enhance the probability of things working. */
3425 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3427 if (off_p)
3428 base = gen_rtx_fmt_e (CONST, address_mode,
3429 gen_rtx_fmt_ee
3430 (PLUS, address_mode, base,
3431 gen_int_mode (off, address_mode)));
3433 else if (off_p)
3434 base = gen_int_mode (off, address_mode);
3435 else
3436 base = NULL_RTX;
3438 if (base)
3439 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3441 start_sequence ();
3442 /* To avoid splitting addressing modes, pretend that no cse will
3443 follow. */
3444 old_cse_not_expected = cse_not_expected;
3445 cse_not_expected = true;
3446 addr = memory_address_addr_space (mem_mode, addr, as);
3447 cse_not_expected = old_cse_not_expected;
3448 seq = get_insns ();
3449 end_sequence ();
3451 acost = seq_cost (seq, speed);
3452 acost += address_cost (addr, mem_mode, as, speed);
3454 if (!acost)
3455 acost = 1;
3456 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3459 /* On some targets, it is quite expensive to load symbol to a register,
3460 which makes addresses that contain symbols look much more expensive.
3461 However, the symbol will have to be loaded in any case before the
3462 loop (and quite likely we have it in register already), so it does not
3463 make much sense to penalize them too heavily. So make some final
3464 tweaks for the SYMBOL_PRESENT modes:
3466 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3467 var is cheaper, use this mode with small penalty.
3468 If VAR_PRESENT is true, try whether the mode with
3469 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3470 if this is the case, use it. */
3471 add_c = add_cost (speed, address_mode);
3472 for (i = 0; i < 8; i++)
3474 var_p = i & 1;
3475 off_p = (i >> 1) & 1;
3476 rat_p = (i >> 2) & 1;
3478 acost = data->costs[0][1][off_p][rat_p] + 1;
3479 if (var_p)
3480 acost += add_c;
3482 if (acost < data->costs[1][var_p][off_p][rat_p])
3483 data->costs[1][var_p][off_p][rat_p] = acost;
3486 if (dump_file && (dump_flags & TDF_DETAILS))
3488 fprintf (dump_file, "Address costs:\n");
3490 for (i = 0; i < 16; i++)
3492 sym_p = i & 1;
3493 var_p = (i >> 1) & 1;
3494 off_p = (i >> 2) & 1;
3495 rat_p = (i >> 3) & 1;
3497 fprintf (dump_file, " ");
3498 if (sym_p)
3499 fprintf (dump_file, "sym + ");
3500 if (var_p)
3501 fprintf (dump_file, "var + ");
3502 if (off_p)
3503 fprintf (dump_file, "cst + ");
3504 if (rat_p)
3505 fprintf (dump_file, "rat * ");
3507 acost = data->costs[sym_p][var_p][off_p][rat_p];
3508 fprintf (dump_file, "index costs %d\n", acost);
3510 if (has_predec[mem_mode] || has_postdec[mem_mode]
3511 || has_preinc[mem_mode] || has_postinc[mem_mode])
3512 fprintf (dump_file, " May include autoinc/dec\n");
3513 fprintf (dump_file, "\n");
3516 address_cost_data_list[data_index] = data;
3519 bits = GET_MODE_BITSIZE (address_mode);
3520 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3521 offset &= mask;
3522 if ((offset >> (bits - 1) & 1))
3523 offset |= ~mask;
3524 s_offset = offset;
3526 autoinc = false;
3527 autoinc_type = AINC_NONE;
3528 msize = GET_MODE_SIZE (mem_mode);
3529 autoinc_offset = offset;
3530 if (stmt_after_inc)
3531 autoinc_offset += ratio * cstep;
3532 if (symbol_present || var_present || ratio != 1)
3533 autoinc = false;
3534 else
3536 if (has_postinc[mem_mode] && autoinc_offset == 0
3537 && msize == cstep)
3538 autoinc_type = AINC_POST_INC;
3539 else if (has_postdec[mem_mode] && autoinc_offset == 0
3540 && msize == -cstep)
3541 autoinc_type = AINC_POST_DEC;
3542 else if (has_preinc[mem_mode] && autoinc_offset == msize
3543 && msize == cstep)
3544 autoinc_type = AINC_PRE_INC;
3545 else if (has_predec[mem_mode] && autoinc_offset == -msize
3546 && msize == -cstep)
3547 autoinc_type = AINC_PRE_DEC;
3549 if (autoinc_type != AINC_NONE)
3550 autoinc = true;
3553 cost = 0;
3554 offset_p = (s_offset != 0
3555 && data->min_offset <= s_offset
3556 && s_offset <= data->max_offset);
3557 ratio_p = (ratio != 1
3558 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3560 if (ratio != 1 && !ratio_p)
3561 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3563 if (s_offset && !offset_p && !symbol_present)
3564 cost += add_cost (speed, address_mode);
3566 if (may_autoinc)
3567 *may_autoinc = autoinc;
3568 if (autoinc)
3569 acost = data->ainc_costs[autoinc_type];
3570 else
3571 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3572 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3573 return new_cost (cost + acost, complexity);
3576 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3577 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3578 calculating the operands of EXPR. Returns true if successful, and returns
3579 the cost in COST. */
3581 static bool
3582 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3583 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3585 comp_cost res;
3586 tree op1 = TREE_OPERAND (expr, 1);
3587 tree cst = TREE_OPERAND (mult, 1);
3588 tree multop = TREE_OPERAND (mult, 0);
3589 int m = exact_log2 (int_cst_value (cst));
3590 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3591 int sa_cost;
3592 bool equal_p = false;
3594 if (!(m >= 0 && m < maxm))
3595 return false;
3597 if (operand_equal_p (op1, mult, 0))
3598 equal_p = true;
3600 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3601 ? shiftadd_cost (speed, mode, m)
3602 : (equal_p
3603 ? shiftsub1_cost (speed, mode, m)
3604 : shiftsub0_cost (speed, mode, m)));
3605 res = new_cost (sa_cost, 0);
3606 res = add_costs (res, equal_p ? cost0 : cost1);
3608 STRIP_NOPS (multop);
3609 if (!is_gimple_val (multop))
3610 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3612 *cost = res;
3613 return true;
3616 /* Estimates cost of forcing expression EXPR into a variable. */
3618 static comp_cost
3619 force_expr_to_var_cost (tree expr, bool speed)
3621 static bool costs_initialized = false;
3622 static unsigned integer_cost [2];
3623 static unsigned symbol_cost [2];
3624 static unsigned address_cost [2];
3625 tree op0, op1;
3626 comp_cost cost0, cost1, cost;
3627 enum machine_mode mode;
3629 if (!costs_initialized)
3631 tree type = build_pointer_type (integer_type_node);
3632 tree var, addr;
3633 rtx x;
3634 int i;
3636 var = create_tmp_var_raw (integer_type_node, "test_var");
3637 TREE_STATIC (var) = 1;
3638 x = produce_memory_decl_rtl (var, NULL);
3639 SET_DECL_RTL (var, x);
3641 addr = build1 (ADDR_EXPR, type, var);
3644 for (i = 0; i < 2; i++)
3646 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3647 2000), i);
3649 symbol_cost[i] = computation_cost (addr, i) + 1;
3651 address_cost[i]
3652 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3653 if (dump_file && (dump_flags & TDF_DETAILS))
3655 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3656 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3657 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3658 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3659 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3660 fprintf (dump_file, "\n");
3664 costs_initialized = true;
3667 STRIP_NOPS (expr);
3669 if (SSA_VAR_P (expr))
3670 return no_cost;
3672 if (is_gimple_min_invariant (expr))
3674 if (TREE_CODE (expr) == INTEGER_CST)
3675 return new_cost (integer_cost [speed], 0);
3677 if (TREE_CODE (expr) == ADDR_EXPR)
3679 tree obj = TREE_OPERAND (expr, 0);
3681 if (TREE_CODE (obj) == VAR_DECL
3682 || TREE_CODE (obj) == PARM_DECL
3683 || TREE_CODE (obj) == RESULT_DECL)
3684 return new_cost (symbol_cost [speed], 0);
3687 return new_cost (address_cost [speed], 0);
3690 switch (TREE_CODE (expr))
3692 case POINTER_PLUS_EXPR:
3693 case PLUS_EXPR:
3694 case MINUS_EXPR:
3695 case MULT_EXPR:
3696 op0 = TREE_OPERAND (expr, 0);
3697 op1 = TREE_OPERAND (expr, 1);
3698 STRIP_NOPS (op0);
3699 STRIP_NOPS (op1);
3700 break;
3702 CASE_CONVERT:
3703 case NEGATE_EXPR:
3704 op0 = TREE_OPERAND (expr, 0);
3705 STRIP_NOPS (op0);
3706 op1 = NULL_TREE;
3707 break;
3709 default:
3710 /* Just an arbitrary value, FIXME. */
3711 return new_cost (target_spill_cost[speed], 0);
3714 if (op0 == NULL_TREE
3715 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3716 cost0 = no_cost;
3717 else
3718 cost0 = force_expr_to_var_cost (op0, speed);
3720 if (op1 == NULL_TREE
3721 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3722 cost1 = no_cost;
3723 else
3724 cost1 = force_expr_to_var_cost (op1, speed);
3726 mode = TYPE_MODE (TREE_TYPE (expr));
3727 switch (TREE_CODE (expr))
3729 case POINTER_PLUS_EXPR:
3730 case PLUS_EXPR:
3731 case MINUS_EXPR:
3732 case NEGATE_EXPR:
3733 cost = new_cost (add_cost (speed, mode), 0);
3734 if (TREE_CODE (expr) != NEGATE_EXPR)
3736 tree mult = NULL_TREE;
3737 comp_cost sa_cost;
3738 if (TREE_CODE (op1) == MULT_EXPR)
3739 mult = op1;
3740 else if (TREE_CODE (op0) == MULT_EXPR)
3741 mult = op0;
3743 if (mult != NULL_TREE
3744 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3745 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3746 speed, &sa_cost))
3747 return sa_cost;
3749 break;
3751 CASE_CONVERT:
3753 tree inner_mode, outer_mode;
3754 outer_mode = TREE_TYPE (expr);
3755 inner_mode = TREE_TYPE (op0);
3756 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3757 TYPE_MODE (inner_mode), speed), 0);
3759 break;
3761 case MULT_EXPR:
3762 if (cst_and_fits_in_hwi (op0))
3763 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3764 mode, speed), 0);
3765 else if (cst_and_fits_in_hwi (op1))
3766 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3767 mode, speed), 0);
3768 else
3769 return new_cost (target_spill_cost [speed], 0);
3770 break;
3772 default:
3773 gcc_unreachable ();
3776 cost = add_costs (cost, cost0);
3777 cost = add_costs (cost, cost1);
3779 /* Bound the cost by target_spill_cost. The parts of complicated
3780 computations often are either loop invariant or at least can
3781 be shared between several iv uses, so letting this grow without
3782 limits would not give reasonable results. */
3783 if (cost.cost > (int) target_spill_cost [speed])
3784 cost.cost = target_spill_cost [speed];
3786 return cost;
3789 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3790 invariants the computation depends on. */
3792 static comp_cost
3793 force_var_cost (struct ivopts_data *data,
3794 tree expr, bitmap *depends_on)
3796 if (depends_on)
3798 fd_ivopts_data = data;
3799 walk_tree (&expr, find_depends, depends_on, NULL);
3802 return force_expr_to_var_cost (expr, data->speed);
3805 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3806 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3807 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3808 invariants the computation depends on. */
3810 static comp_cost
3811 split_address_cost (struct ivopts_data *data,
3812 tree addr, bool *symbol_present, bool *var_present,
3813 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3815 tree core;
3816 HOST_WIDE_INT bitsize;
3817 HOST_WIDE_INT bitpos;
3818 tree toffset;
3819 enum machine_mode mode;
3820 int unsignedp, volatilep;
3822 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3823 &unsignedp, &volatilep, false);
3825 if (toffset != 0
3826 || bitpos % BITS_PER_UNIT != 0
3827 || TREE_CODE (core) != VAR_DECL)
3829 *symbol_present = false;
3830 *var_present = true;
3831 fd_ivopts_data = data;
3832 walk_tree (&addr, find_depends, depends_on, NULL);
3833 return new_cost (target_spill_cost[data->speed], 0);
3836 *offset += bitpos / BITS_PER_UNIT;
3837 if (TREE_STATIC (core)
3838 || DECL_EXTERNAL (core))
3840 *symbol_present = true;
3841 *var_present = false;
3842 return no_cost;
3845 *symbol_present = false;
3846 *var_present = true;
3847 return no_cost;
3850 /* Estimates cost of expressing difference of addresses E1 - E2 as
3851 var + symbol + offset. The value of offset is added to OFFSET,
3852 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3853 part is missing. DEPENDS_ON is a set of the invariants the computation
3854 depends on. */
3856 static comp_cost
3857 ptr_difference_cost (struct ivopts_data *data,
3858 tree e1, tree e2, bool *symbol_present, bool *var_present,
3859 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3861 HOST_WIDE_INT diff = 0;
3862 aff_tree aff_e1, aff_e2;
3863 tree type;
3865 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3867 if (ptr_difference_const (e1, e2, &diff))
3869 *offset += diff;
3870 *symbol_present = false;
3871 *var_present = false;
3872 return no_cost;
3875 if (integer_zerop (e2))
3876 return split_address_cost (data, TREE_OPERAND (e1, 0),
3877 symbol_present, var_present, offset, depends_on);
3879 *symbol_present = false;
3880 *var_present = true;
3882 type = signed_type_for (TREE_TYPE (e1));
3883 tree_to_aff_combination (e1, type, &aff_e1);
3884 tree_to_aff_combination (e2, type, &aff_e2);
3885 aff_combination_scale (&aff_e2, -1);
3886 aff_combination_add (&aff_e1, &aff_e2);
3888 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3891 /* Estimates cost of expressing difference E1 - E2 as
3892 var + symbol + offset. The value of offset is added to OFFSET,
3893 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3894 part is missing. DEPENDS_ON is a set of the invariants the computation
3895 depends on. */
3897 static comp_cost
3898 difference_cost (struct ivopts_data *data,
3899 tree e1, tree e2, bool *symbol_present, bool *var_present,
3900 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3902 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3903 unsigned HOST_WIDE_INT off1, off2;
3904 aff_tree aff_e1, aff_e2;
3905 tree type;
3907 e1 = strip_offset (e1, &off1);
3908 e2 = strip_offset (e2, &off2);
3909 *offset += off1 - off2;
3911 STRIP_NOPS (e1);
3912 STRIP_NOPS (e2);
3914 if (TREE_CODE (e1) == ADDR_EXPR)
3915 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3916 offset, depends_on);
3917 *symbol_present = false;
3919 if (operand_equal_p (e1, e2, 0))
3921 *var_present = false;
3922 return no_cost;
3925 *var_present = true;
3927 if (integer_zerop (e2))
3928 return force_var_cost (data, e1, depends_on);
3930 if (integer_zerop (e1))
3932 comp_cost cost = force_var_cost (data, e2, depends_on);
3933 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3934 return cost;
3937 type = signed_type_for (TREE_TYPE (e1));
3938 tree_to_aff_combination (e1, type, &aff_e1);
3939 tree_to_aff_combination (e2, type, &aff_e2);
3940 aff_combination_scale (&aff_e2, -1);
3941 aff_combination_add (&aff_e1, &aff_e2);
3943 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3946 /* Returns true if AFF1 and AFF2 are identical. */
3948 static bool
3949 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3951 unsigned i;
3953 if (aff1->n != aff2->n)
3954 return false;
3956 for (i = 0; i < aff1->n; i++)
3958 if (aff1->elts[i].coef != aff2->elts[i].coef)
3959 return false;
3961 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3962 return false;
3964 return true;
3967 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3969 static int
3970 get_expr_id (struct ivopts_data *data, tree expr)
3972 struct iv_inv_expr_ent ent;
3973 struct iv_inv_expr_ent **slot;
3975 ent.expr = expr;
3976 ent.hash = iterative_hash_expr (expr, 0);
3977 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3978 if (*slot)
3979 return (*slot)->id;
3981 *slot = XNEW (struct iv_inv_expr_ent);
3982 (*slot)->expr = expr;
3983 (*slot)->hash = ent.hash;
3984 (*slot)->id = data->inv_expr_id++;
3985 return (*slot)->id;
3988 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3989 requires a new compiler generated temporary. Returns -1 otherwise.
3990 ADDRESS_P is a flag indicating if the expression is for address
3991 computation. */
3993 static int
3994 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3995 tree cbase, HOST_WIDE_INT ratio,
3996 bool address_p)
3998 aff_tree ubase_aff, cbase_aff;
3999 tree expr, ub, cb;
4001 STRIP_NOPS (ubase);
4002 STRIP_NOPS (cbase);
4003 ub = ubase;
4004 cb = cbase;
4006 if ((TREE_CODE (ubase) == INTEGER_CST)
4007 && (TREE_CODE (cbase) == INTEGER_CST))
4008 return -1;
4010 /* Strips the constant part. */
4011 if (TREE_CODE (ubase) == PLUS_EXPR
4012 || TREE_CODE (ubase) == MINUS_EXPR
4013 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4015 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4016 ubase = TREE_OPERAND (ubase, 0);
4019 /* Strips the constant part. */
4020 if (TREE_CODE (cbase) == PLUS_EXPR
4021 || TREE_CODE (cbase) == MINUS_EXPR
4022 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4024 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4025 cbase = TREE_OPERAND (cbase, 0);
4028 if (address_p)
4030 if (((TREE_CODE (ubase) == SSA_NAME)
4031 || (TREE_CODE (ubase) == ADDR_EXPR
4032 && is_gimple_min_invariant (ubase)))
4033 && (TREE_CODE (cbase) == INTEGER_CST))
4034 return -1;
4036 if (((TREE_CODE (cbase) == SSA_NAME)
4037 || (TREE_CODE (cbase) == ADDR_EXPR
4038 && is_gimple_min_invariant (cbase)))
4039 && (TREE_CODE (ubase) == INTEGER_CST))
4040 return -1;
4043 if (ratio == 1)
4045 if (operand_equal_p (ubase, cbase, 0))
4046 return -1;
4048 if (TREE_CODE (ubase) == ADDR_EXPR
4049 && TREE_CODE (cbase) == ADDR_EXPR)
4051 tree usym, csym;
4053 usym = TREE_OPERAND (ubase, 0);
4054 csym = TREE_OPERAND (cbase, 0);
4055 if (TREE_CODE (usym) == ARRAY_REF)
4057 tree ind = TREE_OPERAND (usym, 1);
4058 if (TREE_CODE (ind) == INTEGER_CST
4059 && tree_fits_shwi_p (ind)
4060 && tree_to_shwi (ind) == 0)
4061 usym = TREE_OPERAND (usym, 0);
4063 if (TREE_CODE (csym) == ARRAY_REF)
4065 tree ind = TREE_OPERAND (csym, 1);
4066 if (TREE_CODE (ind) == INTEGER_CST
4067 && tree_fits_shwi_p (ind)
4068 && tree_to_shwi (ind) == 0)
4069 csym = TREE_OPERAND (csym, 0);
4071 if (operand_equal_p (usym, csym, 0))
4072 return -1;
4074 /* Now do more complex comparison */
4075 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4076 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4077 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4078 return -1;
4081 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4082 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4084 aff_combination_scale (&cbase_aff, -1 * ratio);
4085 aff_combination_add (&ubase_aff, &cbase_aff);
4086 expr = aff_combination_to_tree (&ubase_aff);
4087 return get_expr_id (data, expr);
4092 /* Determines the cost of the computation by that USE is expressed
4093 from induction variable CAND. If ADDRESS_P is true, we just need
4094 to create an address from it, otherwise we want to get it into
4095 register. A set of invariants we depend on is stored in
4096 DEPENDS_ON. AT is the statement at that the value is computed.
4097 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4098 addressing is likely. */
4100 static comp_cost
4101 get_computation_cost_at (struct ivopts_data *data,
4102 struct iv_use *use, struct iv_cand *cand,
4103 bool address_p, bitmap *depends_on, gimple at,
4104 bool *can_autoinc,
4105 int *inv_expr_id)
4107 tree ubase = use->iv->base, ustep = use->iv->step;
4108 tree cbase, cstep;
4109 tree utype = TREE_TYPE (ubase), ctype;
4110 unsigned HOST_WIDE_INT cstepi, offset = 0;
4111 HOST_WIDE_INT ratio, aratio;
4112 bool var_present, symbol_present, stmt_is_after_inc;
4113 comp_cost cost;
4114 widest_int rat;
4115 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4116 enum machine_mode mem_mode = (address_p
4117 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4118 : VOIDmode);
4120 *depends_on = NULL;
4122 /* Only consider real candidates. */
4123 if (!cand->iv)
4124 return infinite_cost;
4126 cbase = cand->iv->base;
4127 cstep = cand->iv->step;
4128 ctype = TREE_TYPE (cbase);
4130 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4132 /* We do not have a precision to express the values of use. */
4133 return infinite_cost;
4136 if (address_p
4137 || (use->iv->base_object
4138 && cand->iv->base_object
4139 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4140 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4142 /* Do not try to express address of an object with computation based
4143 on address of a different object. This may cause problems in rtl
4144 level alias analysis (that does not expect this to be happening,
4145 as this is illegal in C), and would be unlikely to be useful
4146 anyway. */
4147 if (use->iv->base_object
4148 && cand->iv->base_object
4149 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4150 return infinite_cost;
4153 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4155 /* TODO -- add direct handling of this case. */
4156 goto fallback;
4159 /* CSTEPI is removed from the offset in case statement is after the
4160 increment. If the step is not constant, we use zero instead.
4161 This is a bit imprecise (there is the extra addition), but
4162 redundancy elimination is likely to transform the code so that
4163 it uses value of the variable before increment anyway,
4164 so it is not that much unrealistic. */
4165 if (cst_and_fits_in_hwi (cstep))
4166 cstepi = int_cst_value (cstep);
4167 else
4168 cstepi = 0;
4170 if (!constant_multiple_of (ustep, cstep, &rat))
4171 return infinite_cost;
4173 if (wi::fits_shwi_p (rat))
4174 ratio = rat.to_shwi ();
4175 else
4176 return infinite_cost;
4178 STRIP_NOPS (cbase);
4179 ctype = TREE_TYPE (cbase);
4181 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4183 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4184 or ratio == 1, it is better to handle this like
4186 ubase - ratio * cbase + ratio * var
4188 (also holds in the case ratio == -1, TODO. */
4190 if (cst_and_fits_in_hwi (cbase))
4192 offset = - ratio * int_cst_value (cbase);
4193 cost = difference_cost (data,
4194 ubase, build_int_cst (utype, 0),
4195 &symbol_present, &var_present, &offset,
4196 depends_on);
4197 cost.cost /= avg_loop_niter (data->current_loop);
4199 else if (ratio == 1)
4201 tree real_cbase = cbase;
4203 /* Check to see if any adjustment is needed. */
4204 if (cstepi == 0 && stmt_is_after_inc)
4206 aff_tree real_cbase_aff;
4207 aff_tree cstep_aff;
4209 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4210 &real_cbase_aff);
4211 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4213 aff_combination_add (&real_cbase_aff, &cstep_aff);
4214 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4217 cost = difference_cost (data,
4218 ubase, real_cbase,
4219 &symbol_present, &var_present, &offset,
4220 depends_on);
4221 cost.cost /= avg_loop_niter (data->current_loop);
4223 else if (address_p
4224 && !POINTER_TYPE_P (ctype)
4225 && multiplier_allowed_in_address_p
4226 (ratio, mem_mode,
4227 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4229 cbase
4230 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4231 cost = difference_cost (data,
4232 ubase, cbase,
4233 &symbol_present, &var_present, &offset,
4234 depends_on);
4235 cost.cost /= avg_loop_niter (data->current_loop);
4237 else
4239 cost = force_var_cost (data, cbase, depends_on);
4240 cost = add_costs (cost,
4241 difference_cost (data,
4242 ubase, build_int_cst (utype, 0),
4243 &symbol_present, &var_present,
4244 &offset, depends_on));
4245 cost.cost /= avg_loop_niter (data->current_loop);
4246 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4249 if (inv_expr_id)
4251 *inv_expr_id =
4252 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4253 /* Clear depends on. */
4254 if (*inv_expr_id != -1 && depends_on && *depends_on)
4255 bitmap_clear (*depends_on);
4258 /* If we are after the increment, the value of the candidate is higher by
4259 one iteration. */
4260 if (stmt_is_after_inc)
4261 offset -= ratio * cstepi;
4263 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4264 (symbol/var1/const parts may be omitted). If we are looking for an
4265 address, find the cost of addressing this. */
4266 if (address_p)
4267 return add_costs (cost,
4268 get_address_cost (symbol_present, var_present,
4269 offset, ratio, cstepi,
4270 mem_mode,
4271 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4272 speed, stmt_is_after_inc,
4273 can_autoinc));
4275 /* Otherwise estimate the costs for computing the expression. */
4276 if (!symbol_present && !var_present && !offset)
4278 if (ratio != 1)
4279 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4280 return cost;
4283 /* Symbol + offset should be compile-time computable so consider that they
4284 are added once to the variable, if present. */
4285 if (var_present && (symbol_present || offset))
4286 cost.cost += adjust_setup_cost (data,
4287 add_cost (speed, TYPE_MODE (ctype)));
4289 /* Having offset does not affect runtime cost in case it is added to
4290 symbol, but it increases complexity. */
4291 if (offset)
4292 cost.complexity++;
4294 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4296 aratio = ratio > 0 ? ratio : -ratio;
4297 if (aratio != 1)
4298 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4299 return cost;
4301 fallback:
4302 if (can_autoinc)
4303 *can_autoinc = false;
4306 /* Just get the expression, expand it and measure the cost. */
4307 tree comp = get_computation_at (data->current_loop, use, cand, at);
4309 if (!comp)
4310 return infinite_cost;
4312 if (address_p)
4313 comp = build_simple_mem_ref (comp);
4315 return new_cost (computation_cost (comp, speed), 0);
4319 /* Determines the cost of the computation by that USE is expressed
4320 from induction variable CAND. If ADDRESS_P is true, we just need
4321 to create an address from it, otherwise we want to get it into
4322 register. A set of invariants we depend on is stored in
4323 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4324 autoinc addressing is likely. */
4326 static comp_cost
4327 get_computation_cost (struct ivopts_data *data,
4328 struct iv_use *use, struct iv_cand *cand,
4329 bool address_p, bitmap *depends_on,
4330 bool *can_autoinc, int *inv_expr_id)
4332 return get_computation_cost_at (data,
4333 use, cand, address_p, depends_on, use->stmt,
4334 can_autoinc, inv_expr_id);
4337 /* Determines cost of basing replacement of USE on CAND in a generic
4338 expression. */
4340 static bool
4341 determine_use_iv_cost_generic (struct ivopts_data *data,
4342 struct iv_use *use, struct iv_cand *cand)
4344 bitmap depends_on;
4345 comp_cost cost;
4346 int inv_expr_id = -1;
4348 /* The simple case first -- if we need to express value of the preserved
4349 original biv, the cost is 0. This also prevents us from counting the
4350 cost of increment twice -- once at this use and once in the cost of
4351 the candidate. */
4352 if (cand->pos == IP_ORIGINAL
4353 && cand->incremented_at == use->stmt)
4355 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4356 ERROR_MARK, -1);
4357 return true;
4360 cost = get_computation_cost (data, use, cand, false, &depends_on,
4361 NULL, &inv_expr_id);
4363 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4364 inv_expr_id);
4366 return !infinite_cost_p (cost);
4369 /* Determines cost of basing replacement of USE on CAND in an address. */
4371 static bool
4372 determine_use_iv_cost_address (struct ivopts_data *data,
4373 struct iv_use *use, struct iv_cand *cand)
4375 bitmap depends_on;
4376 bool can_autoinc;
4377 int inv_expr_id = -1;
4378 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4379 &can_autoinc, &inv_expr_id);
4381 if (cand->ainc_use == use)
4383 if (can_autoinc)
4384 cost.cost -= cand->cost_step;
4385 /* If we generated the candidate solely for exploiting autoincrement
4386 opportunities, and it turns out it can't be used, set the cost to
4387 infinity to make sure we ignore it. */
4388 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4389 cost = infinite_cost;
4391 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4392 inv_expr_id);
4394 return !infinite_cost_p (cost);
4397 /* Computes value of candidate CAND at position AT in iteration NITER, and
4398 stores it to VAL. */
4400 static void
4401 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4402 aff_tree *val)
4404 aff_tree step, delta, nit;
4405 struct iv *iv = cand->iv;
4406 tree type = TREE_TYPE (iv->base);
4407 tree steptype = type;
4408 if (POINTER_TYPE_P (type))
4409 steptype = sizetype;
4410 steptype = unsigned_type_for (type);
4412 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4413 aff_combination_convert (&step, steptype);
4414 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4415 aff_combination_convert (&nit, steptype);
4416 aff_combination_mult (&nit, &step, &delta);
4417 if (stmt_after_increment (loop, cand, at))
4418 aff_combination_add (&delta, &step);
4420 tree_to_aff_combination (iv->base, type, val);
4421 if (!POINTER_TYPE_P (type))
4422 aff_combination_convert (val, steptype);
4423 aff_combination_add (val, &delta);
4426 /* Returns period of induction variable iv. */
4428 static tree
4429 iv_period (struct iv *iv)
4431 tree step = iv->step, period, type;
4432 tree pow2div;
4434 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4436 type = unsigned_type_for (TREE_TYPE (step));
4437 /* Period of the iv is lcm (step, type_range)/step -1,
4438 i.e., N*type_range/step - 1. Since type range is power
4439 of two, N == (step >> num_of_ending_zeros_binary (step),
4440 so the final result is
4442 (type_range >> num_of_ending_zeros_binary (step)) - 1
4445 pow2div = num_ending_zeros (step);
4447 period = build_low_bits_mask (type,
4448 (TYPE_PRECISION (type)
4449 - tree_to_uhwi (pow2div)));
4451 return period;
4454 /* Returns the comparison operator used when eliminating the iv USE. */
4456 static enum tree_code
4457 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4459 struct loop *loop = data->current_loop;
4460 basic_block ex_bb;
4461 edge exit;
4463 ex_bb = gimple_bb (use->stmt);
4464 exit = EDGE_SUCC (ex_bb, 0);
4465 if (flow_bb_inside_loop_p (loop, exit->dest))
4466 exit = EDGE_SUCC (ex_bb, 1);
4468 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4471 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4472 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4473 calculation is performed in non-wrapping type.
4475 TODO: More generally, we could test for the situation that
4476 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4477 This would require knowing the sign of OFFSET. */
4479 static bool
4480 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4482 enum tree_code code;
4483 tree e1, e2;
4484 aff_tree aff_e1, aff_e2, aff_offset;
4486 if (!nowrap_type_p (TREE_TYPE (base)))
4487 return false;
4489 base = expand_simple_operations (base);
4491 if (TREE_CODE (base) == SSA_NAME)
4493 gimple stmt = SSA_NAME_DEF_STMT (base);
4495 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4496 return false;
4498 code = gimple_assign_rhs_code (stmt);
4499 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4500 return false;
4502 e1 = gimple_assign_rhs1 (stmt);
4503 e2 = gimple_assign_rhs2 (stmt);
4505 else
4507 code = TREE_CODE (base);
4508 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509 return false;
4510 e1 = TREE_OPERAND (base, 0);
4511 e2 = TREE_OPERAND (base, 1);
4514 /* Use affine expansion as deeper inspection to prove the equality. */
4515 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4516 &aff_e2, &data->name_expansion_cache);
4517 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4518 &aff_offset, &data->name_expansion_cache);
4519 aff_combination_scale (&aff_offset, -1);
4520 switch (code)
4522 case PLUS_EXPR:
4523 aff_combination_add (&aff_e2, &aff_offset);
4524 if (aff_combination_zero_p (&aff_e2))
4525 return true;
4527 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4528 &aff_e1, &data->name_expansion_cache);
4529 aff_combination_add (&aff_e1, &aff_offset);
4530 return aff_combination_zero_p (&aff_e1);
4532 case POINTER_PLUS_EXPR:
4533 aff_combination_add (&aff_e2, &aff_offset);
4534 return aff_combination_zero_p (&aff_e2);
4536 default:
4537 return false;
4541 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4542 comparison with CAND. NITER describes the number of iterations of
4543 the loops. If successful, the comparison in COMP_P is altered accordingly.
4545 We aim to handle the following situation:
4547 sometype *base, *p;
4548 int a, b, i;
4550 i = a;
4551 p = p_0 = base + a;
4555 bla (*p);
4556 p++;
4557 i++;
4559 while (i < b);
4561 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4562 We aim to optimize this to
4564 p = p_0 = base + a;
4567 bla (*p);
4568 p++;
4570 while (p < p_0 - a + b);
4572 This preserves the correctness, since the pointer arithmetics does not
4573 overflow. More precisely:
4575 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4576 overflow in computing it or the values of p.
4577 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4578 overflow. To prove this, we use the fact that p_0 = base + a. */
4580 static bool
4581 iv_elimination_compare_lt (struct ivopts_data *data,
4582 struct iv_cand *cand, enum tree_code *comp_p,
4583 struct tree_niter_desc *niter)
4585 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4586 struct aff_tree nit, tmpa, tmpb;
4587 enum tree_code comp;
4588 HOST_WIDE_INT step;
4590 /* We need to know that the candidate induction variable does not overflow.
4591 While more complex analysis may be used to prove this, for now just
4592 check that the variable appears in the original program and that it
4593 is computed in a type that guarantees no overflows. */
4594 cand_type = TREE_TYPE (cand->iv->base);
4595 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4596 return false;
4598 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4599 the calculation of the BOUND could overflow, making the comparison
4600 invalid. */
4601 if (!data->loop_single_exit_p)
4602 return false;
4604 /* We need to be able to decide whether candidate is increasing or decreasing
4605 in order to choose the right comparison operator. */
4606 if (!cst_and_fits_in_hwi (cand->iv->step))
4607 return false;
4608 step = int_cst_value (cand->iv->step);
4610 /* Check that the number of iterations matches the expected pattern:
4611 a + 1 > b ? 0 : b - a - 1. */
4612 mbz = niter->may_be_zero;
4613 if (TREE_CODE (mbz) == GT_EXPR)
4615 /* Handle a + 1 > b. */
4616 tree op0 = TREE_OPERAND (mbz, 0);
4617 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4619 a = TREE_OPERAND (op0, 0);
4620 b = TREE_OPERAND (mbz, 1);
4622 else
4623 return false;
4625 else if (TREE_CODE (mbz) == LT_EXPR)
4627 tree op1 = TREE_OPERAND (mbz, 1);
4629 /* Handle b < a + 1. */
4630 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4632 a = TREE_OPERAND (op1, 0);
4633 b = TREE_OPERAND (mbz, 0);
4635 else
4636 return false;
4638 else
4639 return false;
4641 /* Expected number of iterations is B - A - 1. Check that it matches
4642 the actual number, i.e., that B - A - NITER = 1. */
4643 tree_to_aff_combination (niter->niter, nit_type, &nit);
4644 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4645 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4646 aff_combination_scale (&nit, -1);
4647 aff_combination_scale (&tmpa, -1);
4648 aff_combination_add (&tmpb, &tmpa);
4649 aff_combination_add (&tmpb, &nit);
4650 if (tmpb.n != 0 || tmpb.offset != 1)
4651 return false;
4653 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4654 overflow. */
4655 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4656 cand->iv->step,
4657 fold_convert (TREE_TYPE (cand->iv->step), a));
4658 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4659 return false;
4661 /* Determine the new comparison operator. */
4662 comp = step < 0 ? GT_EXPR : LT_EXPR;
4663 if (*comp_p == NE_EXPR)
4664 *comp_p = comp;
4665 else if (*comp_p == EQ_EXPR)
4666 *comp_p = invert_tree_comparison (comp, false);
4667 else
4668 gcc_unreachable ();
4670 return true;
4673 /* Check whether it is possible to express the condition in USE by comparison
4674 of candidate CAND. If so, store the value compared with to BOUND, and the
4675 comparison operator to COMP. */
4677 static bool
4678 may_eliminate_iv (struct ivopts_data *data,
4679 struct iv_use *use, struct iv_cand *cand, tree *bound,
4680 enum tree_code *comp)
4682 basic_block ex_bb;
4683 edge exit;
4684 tree period;
4685 struct loop *loop = data->current_loop;
4686 aff_tree bnd;
4687 struct tree_niter_desc *desc = NULL;
4689 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4690 return false;
4692 /* For now works only for exits that dominate the loop latch.
4693 TODO: extend to other conditions inside loop body. */
4694 ex_bb = gimple_bb (use->stmt);
4695 if (use->stmt != last_stmt (ex_bb)
4696 || gimple_code (use->stmt) != GIMPLE_COND
4697 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4698 return false;
4700 exit = EDGE_SUCC (ex_bb, 0);
4701 if (flow_bb_inside_loop_p (loop, exit->dest))
4702 exit = EDGE_SUCC (ex_bb, 1);
4703 if (flow_bb_inside_loop_p (loop, exit->dest))
4704 return false;
4706 desc = niter_for_exit (data, exit);
4707 if (!desc)
4708 return false;
4710 /* Determine whether we can use the variable to test the exit condition.
4711 This is the case iff the period of the induction variable is greater
4712 than the number of iterations for which the exit condition is true. */
4713 period = iv_period (cand->iv);
4715 /* If the number of iterations is constant, compare against it directly. */
4716 if (TREE_CODE (desc->niter) == INTEGER_CST)
4718 /* See cand_value_at. */
4719 if (stmt_after_increment (loop, cand, use->stmt))
4721 if (!tree_int_cst_lt (desc->niter, period))
4722 return false;
4724 else
4726 if (tree_int_cst_lt (period, desc->niter))
4727 return false;
4731 /* If not, and if this is the only possible exit of the loop, see whether
4732 we can get a conservative estimate on the number of iterations of the
4733 entire loop and compare against that instead. */
4734 else
4736 widest_int period_value, max_niter;
4738 max_niter = desc->max;
4739 if (stmt_after_increment (loop, cand, use->stmt))
4740 max_niter += 1;
4741 period_value = wi::to_widest (period);
4742 if (wi::gtu_p (max_niter, period_value))
4744 /* See if we can take advantage of inferred loop bound information. */
4745 if (data->loop_single_exit_p)
4747 if (!max_loop_iterations (loop, &max_niter))
4748 return false;
4749 /* The loop bound is already adjusted by adding 1. */
4750 if (wi::gtu_p (max_niter, period_value))
4751 return false;
4753 else
4754 return false;
4758 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4760 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4761 aff_combination_to_tree (&bnd));
4762 *comp = iv_elimination_compare (data, use);
4764 /* It is unlikely that computing the number of iterations using division
4765 would be more profitable than keeping the original induction variable. */
4766 if (expression_expensive_p (*bound))
4767 return false;
4769 /* Sometimes, it is possible to handle the situation that the number of
4770 iterations may be zero unless additional assumtions by using <
4771 instead of != in the exit condition.
4773 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4774 base the exit condition on it. However, that is often too
4775 expensive. */
4776 if (!integer_zerop (desc->may_be_zero))
4777 return iv_elimination_compare_lt (data, cand, comp, desc);
4779 return true;
4782 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4783 be copied, if is is used in the loop body and DATA->body_includes_call. */
4785 static int
4786 parm_decl_cost (struct ivopts_data *data, tree bound)
4788 tree sbound = bound;
4789 STRIP_NOPS (sbound);
4791 if (TREE_CODE (sbound) == SSA_NAME
4792 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4793 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4794 && data->body_includes_call)
4795 return COSTS_N_INSNS (1);
4797 return 0;
4800 /* Determines cost of basing replacement of USE on CAND in a condition. */
4802 static bool
4803 determine_use_iv_cost_condition (struct ivopts_data *data,
4804 struct iv_use *use, struct iv_cand *cand)
4806 tree bound = NULL_TREE;
4807 struct iv *cmp_iv;
4808 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4809 comp_cost elim_cost, express_cost, cost, bound_cost;
4810 bool ok;
4811 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4812 tree *control_var, *bound_cst;
4813 enum tree_code comp = ERROR_MARK;
4815 /* Only consider real candidates. */
4816 if (!cand->iv)
4818 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4819 ERROR_MARK, -1);
4820 return false;
4823 /* Try iv elimination. */
4824 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4826 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4827 if (elim_cost.cost == 0)
4828 elim_cost.cost = parm_decl_cost (data, bound);
4829 else if (TREE_CODE (bound) == INTEGER_CST)
4830 elim_cost.cost = 0;
4831 /* If we replace a loop condition 'i < n' with 'p < base + n',
4832 depends_on_elim will have 'base' and 'n' set, which implies
4833 that both 'base' and 'n' will be live during the loop. More likely,
4834 'base + n' will be loop invariant, resulting in only one live value
4835 during the loop. So in that case we clear depends_on_elim and set
4836 elim_inv_expr_id instead. */
4837 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4839 elim_inv_expr_id = get_expr_id (data, bound);
4840 bitmap_clear (depends_on_elim);
4842 /* The bound is a loop invariant, so it will be only computed
4843 once. */
4844 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4846 else
4847 elim_cost = infinite_cost;
4849 /* Try expressing the original giv. If it is compared with an invariant,
4850 note that we cannot get rid of it. */
4851 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4852 NULL, &cmp_iv);
4853 gcc_assert (ok);
4855 /* When the condition is a comparison of the candidate IV against
4856 zero, prefer this IV.
4858 TODO: The constant that we're subtracting from the cost should
4859 be target-dependent. This information should be added to the
4860 target costs for each backend. */
4861 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4862 && integer_zerop (*bound_cst)
4863 && (operand_equal_p (*control_var, cand->var_after, 0)
4864 || operand_equal_p (*control_var, cand->var_before, 0)))
4865 elim_cost.cost -= 1;
4867 express_cost = get_computation_cost (data, use, cand, false,
4868 &depends_on_express, NULL,
4869 &express_inv_expr_id);
4870 fd_ivopts_data = data;
4871 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4873 /* Count the cost of the original bound as well. */
4874 bound_cost = force_var_cost (data, *bound_cst, NULL);
4875 if (bound_cost.cost == 0)
4876 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4877 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4878 bound_cost.cost = 0;
4879 express_cost.cost += bound_cost.cost;
4881 /* Choose the better approach, preferring the eliminated IV. */
4882 if (compare_costs (elim_cost, express_cost) <= 0)
4884 cost = elim_cost;
4885 depends_on = depends_on_elim;
4886 depends_on_elim = NULL;
4887 inv_expr_id = elim_inv_expr_id;
4889 else
4891 cost = express_cost;
4892 depends_on = depends_on_express;
4893 depends_on_express = NULL;
4894 bound = NULL_TREE;
4895 comp = ERROR_MARK;
4896 inv_expr_id = express_inv_expr_id;
4899 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4901 if (depends_on_elim)
4902 BITMAP_FREE (depends_on_elim);
4903 if (depends_on_express)
4904 BITMAP_FREE (depends_on_express);
4906 return !infinite_cost_p (cost);
4909 /* Determines cost of basing replacement of USE on CAND. Returns false
4910 if USE cannot be based on CAND. */
4912 static bool
4913 determine_use_iv_cost (struct ivopts_data *data,
4914 struct iv_use *use, struct iv_cand *cand)
4916 switch (use->type)
4918 case USE_NONLINEAR_EXPR:
4919 return determine_use_iv_cost_generic (data, use, cand);
4921 case USE_ADDRESS:
4922 return determine_use_iv_cost_address (data, use, cand);
4924 case USE_COMPARE:
4925 return determine_use_iv_cost_condition (data, use, cand);
4927 default:
4928 gcc_unreachable ();
4932 /* Return true if get_computation_cost indicates that autoincrement is
4933 a possibility for the pair of USE and CAND, false otherwise. */
4935 static bool
4936 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4937 struct iv_cand *cand)
4939 bitmap depends_on;
4940 bool can_autoinc;
4941 comp_cost cost;
4943 if (use->type != USE_ADDRESS)
4944 return false;
4946 cost = get_computation_cost (data, use, cand, true, &depends_on,
4947 &can_autoinc, NULL);
4949 BITMAP_FREE (depends_on);
4951 return !infinite_cost_p (cost) && can_autoinc;
4954 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4955 use that allows autoincrement, and set their AINC_USE if possible. */
4957 static void
4958 set_autoinc_for_original_candidates (struct ivopts_data *data)
4960 unsigned i, j;
4962 for (i = 0; i < n_iv_cands (data); i++)
4964 struct iv_cand *cand = iv_cand (data, i);
4965 struct iv_use *closest_before = NULL;
4966 struct iv_use *closest_after = NULL;
4967 if (cand->pos != IP_ORIGINAL)
4968 continue;
4970 for (j = 0; j < n_iv_uses (data); j++)
4972 struct iv_use *use = iv_use (data, j);
4973 unsigned uid = gimple_uid (use->stmt);
4975 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4976 continue;
4978 if (uid < gimple_uid (cand->incremented_at)
4979 && (closest_before == NULL
4980 || uid > gimple_uid (closest_before->stmt)))
4981 closest_before = use;
4983 if (uid > gimple_uid (cand->incremented_at)
4984 && (closest_after == NULL
4985 || uid < gimple_uid (closest_after->stmt)))
4986 closest_after = use;
4989 if (closest_before != NULL
4990 && autoinc_possible_for_pair (data, closest_before, cand))
4991 cand->ainc_use = closest_before;
4992 else if (closest_after != NULL
4993 && autoinc_possible_for_pair (data, closest_after, cand))
4994 cand->ainc_use = closest_after;
4998 /* Finds the candidates for the induction variables. */
5000 static void
5001 find_iv_candidates (struct ivopts_data *data)
5003 /* Add commonly used ivs. */
5004 add_standard_iv_candidates (data);
5006 /* Add old induction variables. */
5007 add_old_ivs_candidates (data);
5009 /* Add induction variables derived from uses. */
5010 add_derived_ivs_candidates (data);
5012 set_autoinc_for_original_candidates (data);
5014 /* Record the important candidates. */
5015 record_important_candidates (data);
5018 /* Determines costs of basing the use of the iv on an iv candidate. */
5020 static void
5021 determine_use_iv_costs (struct ivopts_data *data)
5023 unsigned i, j;
5024 struct iv_use *use;
5025 struct iv_cand *cand;
5026 bitmap to_clear = BITMAP_ALLOC (NULL);
5028 alloc_use_cost_map (data);
5030 for (i = 0; i < n_iv_uses (data); i++)
5032 use = iv_use (data, i);
5034 if (data->consider_all_candidates)
5036 for (j = 0; j < n_iv_cands (data); j++)
5038 cand = iv_cand (data, j);
5039 determine_use_iv_cost (data, use, cand);
5042 else
5044 bitmap_iterator bi;
5046 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5048 cand = iv_cand (data, j);
5049 if (!determine_use_iv_cost (data, use, cand))
5050 bitmap_set_bit (to_clear, j);
5053 /* Remove the candidates for that the cost is infinite from
5054 the list of related candidates. */
5055 bitmap_and_compl_into (use->related_cands, to_clear);
5056 bitmap_clear (to_clear);
5060 BITMAP_FREE (to_clear);
5062 if (dump_file && (dump_flags & TDF_DETAILS))
5064 fprintf (dump_file, "Use-candidate costs:\n");
5066 for (i = 0; i < n_iv_uses (data); i++)
5068 use = iv_use (data, i);
5070 fprintf (dump_file, "Use %d:\n", i);
5071 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5072 for (j = 0; j < use->n_map_members; j++)
5074 if (!use->cost_map[j].cand
5075 || infinite_cost_p (use->cost_map[j].cost))
5076 continue;
5078 fprintf (dump_file, " %d\t%d\t%d\t",
5079 use->cost_map[j].cand->id,
5080 use->cost_map[j].cost.cost,
5081 use->cost_map[j].cost.complexity);
5082 if (use->cost_map[j].depends_on)
5083 bitmap_print (dump_file,
5084 use->cost_map[j].depends_on, "","");
5085 if (use->cost_map[j].inv_expr_id != -1)
5086 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5087 fprintf (dump_file, "\n");
5090 fprintf (dump_file, "\n");
5092 fprintf (dump_file, "\n");
5096 /* Determines cost of the candidate CAND. */
5098 static void
5099 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5101 comp_cost cost_base;
5102 unsigned cost, cost_step;
5103 tree base;
5105 if (!cand->iv)
5107 cand->cost = 0;
5108 return;
5111 /* There are two costs associated with the candidate -- its increment
5112 and its initialization. The second is almost negligible for any loop
5113 that rolls enough, so we take it just very little into account. */
5115 base = cand->iv->base;
5116 cost_base = force_var_cost (data, base, NULL);
5117 /* It will be exceptional that the iv register happens to be initialized with
5118 the proper value at no cost. In general, there will at least be a regcopy
5119 or a const set. */
5120 if (cost_base.cost == 0)
5121 cost_base.cost = COSTS_N_INSNS (1);
5122 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5124 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5126 /* Prefer the original ivs unless we may gain something by replacing it.
5127 The reason is to make debugging simpler; so this is not relevant for
5128 artificial ivs created by other optimization passes. */
5129 if (cand->pos != IP_ORIGINAL
5130 || !SSA_NAME_VAR (cand->var_before)
5131 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5132 cost++;
5134 /* Prefer not to insert statements into latch unless there are some
5135 already (so that we do not create unnecessary jumps). */
5136 if (cand->pos == IP_END
5137 && empty_block_p (ip_end_pos (data->current_loop)))
5138 cost++;
5140 cand->cost = cost;
5141 cand->cost_step = cost_step;
5144 /* Determines costs of computation of the candidates. */
5146 static void
5147 determine_iv_costs (struct ivopts_data *data)
5149 unsigned i;
5151 if (dump_file && (dump_flags & TDF_DETAILS))
5153 fprintf (dump_file, "Candidate costs:\n");
5154 fprintf (dump_file, " cand\tcost\n");
5157 for (i = 0; i < n_iv_cands (data); i++)
5159 struct iv_cand *cand = iv_cand (data, i);
5161 determine_iv_cost (data, cand);
5163 if (dump_file && (dump_flags & TDF_DETAILS))
5164 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5167 if (dump_file && (dump_flags & TDF_DETAILS))
5168 fprintf (dump_file, "\n");
5171 /* Calculates cost for having SIZE induction variables. */
5173 static unsigned
5174 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5176 /* We add size to the cost, so that we prefer eliminating ivs
5177 if possible. */
5178 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5179 data->body_includes_call);
5182 /* For each size of the induction variable set determine the penalty. */
5184 static void
5185 determine_set_costs (struct ivopts_data *data)
5187 unsigned j, n;
5188 gimple phi;
5189 gimple_stmt_iterator psi;
5190 tree op;
5191 struct loop *loop = data->current_loop;
5192 bitmap_iterator bi;
5194 if (dump_file && (dump_flags & TDF_DETAILS))
5196 fprintf (dump_file, "Global costs:\n");
5197 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5198 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5199 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5200 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5203 n = 0;
5204 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5206 phi = gsi_stmt (psi);
5207 op = PHI_RESULT (phi);
5209 if (virtual_operand_p (op))
5210 continue;
5212 if (get_iv (data, op))
5213 continue;
5215 n++;
5218 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5220 struct version_info *info = ver_info (data, j);
5222 if (info->inv_id && info->has_nonlin_use)
5223 n++;
5226 data->regs_used = n;
5227 if (dump_file && (dump_flags & TDF_DETAILS))
5228 fprintf (dump_file, " regs_used %d\n", n);
5230 if (dump_file && (dump_flags & TDF_DETAILS))
5232 fprintf (dump_file, " cost for size:\n");
5233 fprintf (dump_file, " ivs\tcost\n");
5234 for (j = 0; j <= 2 * target_avail_regs; j++)
5235 fprintf (dump_file, " %d\t%d\n", j,
5236 ivopts_global_cost_for_size (data, j));
5237 fprintf (dump_file, "\n");
5241 /* Returns true if A is a cheaper cost pair than B. */
5243 static bool
5244 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5246 int cmp;
5248 if (!a)
5249 return false;
5251 if (!b)
5252 return true;
5254 cmp = compare_costs (a->cost, b->cost);
5255 if (cmp < 0)
5256 return true;
5258 if (cmp > 0)
5259 return false;
5261 /* In case the costs are the same, prefer the cheaper candidate. */
5262 if (a->cand->cost < b->cand->cost)
5263 return true;
5265 return false;
5269 /* Returns candidate by that USE is expressed in IVS. */
5271 static struct cost_pair *
5272 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5274 return ivs->cand_for_use[use->id];
5277 /* Computes the cost field of IVS structure. */
5279 static void
5280 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5282 comp_cost cost = ivs->cand_use_cost;
5284 cost.cost += ivs->cand_cost;
5286 cost.cost += ivopts_global_cost_for_size (data,
5287 ivs->n_regs + ivs->num_used_inv_expr);
5289 ivs->cost = cost;
5292 /* Remove invariants in set INVS to set IVS. */
5294 static void
5295 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5297 bitmap_iterator bi;
5298 unsigned iid;
5300 if (!invs)
5301 return;
5303 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5305 ivs->n_invariant_uses[iid]--;
5306 if (ivs->n_invariant_uses[iid] == 0)
5307 ivs->n_regs--;
5311 /* Set USE not to be expressed by any candidate in IVS. */
5313 static void
5314 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5315 struct iv_use *use)
5317 unsigned uid = use->id, cid;
5318 struct cost_pair *cp;
5320 cp = ivs->cand_for_use[uid];
5321 if (!cp)
5322 return;
5323 cid = cp->cand->id;
5325 ivs->bad_uses++;
5326 ivs->cand_for_use[uid] = NULL;
5327 ivs->n_cand_uses[cid]--;
5329 if (ivs->n_cand_uses[cid] == 0)
5331 bitmap_clear_bit (ivs->cands, cid);
5332 /* Do not count the pseudocandidates. */
5333 if (cp->cand->iv)
5334 ivs->n_regs--;
5335 ivs->n_cands--;
5336 ivs->cand_cost -= cp->cand->cost;
5338 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5341 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5343 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5345 if (cp->inv_expr_id != -1)
5347 ivs->used_inv_expr[cp->inv_expr_id]--;
5348 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5349 ivs->num_used_inv_expr--;
5351 iv_ca_recount_cost (data, ivs);
5354 /* Add invariants in set INVS to set IVS. */
5356 static void
5357 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5359 bitmap_iterator bi;
5360 unsigned iid;
5362 if (!invs)
5363 return;
5365 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5367 ivs->n_invariant_uses[iid]++;
5368 if (ivs->n_invariant_uses[iid] == 1)
5369 ivs->n_regs++;
5373 /* Set cost pair for USE in set IVS to CP. */
5375 static void
5376 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5377 struct iv_use *use, struct cost_pair *cp)
5379 unsigned uid = use->id, cid;
5381 if (ivs->cand_for_use[uid] == cp)
5382 return;
5384 if (ivs->cand_for_use[uid])
5385 iv_ca_set_no_cp (data, ivs, use);
5387 if (cp)
5389 cid = cp->cand->id;
5391 ivs->bad_uses--;
5392 ivs->cand_for_use[uid] = cp;
5393 ivs->n_cand_uses[cid]++;
5394 if (ivs->n_cand_uses[cid] == 1)
5396 bitmap_set_bit (ivs->cands, cid);
5397 /* Do not count the pseudocandidates. */
5398 if (cp->cand->iv)
5399 ivs->n_regs++;
5400 ivs->n_cands++;
5401 ivs->cand_cost += cp->cand->cost;
5403 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5406 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5407 iv_ca_set_add_invariants (ivs, cp->depends_on);
5409 if (cp->inv_expr_id != -1)
5411 ivs->used_inv_expr[cp->inv_expr_id]++;
5412 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5413 ivs->num_used_inv_expr++;
5415 iv_ca_recount_cost (data, ivs);
5419 /* Extend set IVS by expressing USE by some of the candidates in it
5420 if possible. Consider all important candidates if candidates in
5421 set IVS don't give any result. */
5423 static void
5424 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5425 struct iv_use *use)
5427 struct cost_pair *best_cp = NULL, *cp;
5428 bitmap_iterator bi;
5429 unsigned i;
5430 struct iv_cand *cand;
5432 gcc_assert (ivs->upto >= use->id);
5433 ivs->upto++;
5434 ivs->bad_uses++;
5436 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5438 cand = iv_cand (data, i);
5439 cp = get_use_iv_cost (data, use, cand);
5440 if (cheaper_cost_pair (cp, best_cp))
5441 best_cp = cp;
5444 if (best_cp == NULL)
5446 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5448 cand = iv_cand (data, i);
5449 cp = get_use_iv_cost (data, use, cand);
5450 if (cheaper_cost_pair (cp, best_cp))
5451 best_cp = cp;
5455 iv_ca_set_cp (data, ivs, use, best_cp);
5458 /* Get cost for assignment IVS. */
5460 static comp_cost
5461 iv_ca_cost (struct iv_ca *ivs)
5463 /* This was a conditional expression but it triggered a bug in
5464 Sun C 5.5. */
5465 if (ivs->bad_uses)
5466 return infinite_cost;
5467 else
5468 return ivs->cost;
5471 /* Returns true if all dependences of CP are among invariants in IVS. */
5473 static bool
5474 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5476 unsigned i;
5477 bitmap_iterator bi;
5479 if (!cp->depends_on)
5480 return true;
5482 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5484 if (ivs->n_invariant_uses[i] == 0)
5485 return false;
5488 return true;
5491 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5492 it before NEXT_CHANGE. */
5494 static struct iv_ca_delta *
5495 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5496 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5498 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5500 change->use = use;
5501 change->old_cp = old_cp;
5502 change->new_cp = new_cp;
5503 change->next_change = next_change;
5505 return change;
5508 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5509 are rewritten. */
5511 static struct iv_ca_delta *
5512 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5514 struct iv_ca_delta *last;
5516 if (!l2)
5517 return l1;
5519 if (!l1)
5520 return l2;
5522 for (last = l1; last->next_change; last = last->next_change)
5523 continue;
5524 last->next_change = l2;
5526 return l1;
5529 /* Reverse the list of changes DELTA, forming the inverse to it. */
5531 static struct iv_ca_delta *
5532 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5534 struct iv_ca_delta *act, *next, *prev = NULL;
5535 struct cost_pair *tmp;
5537 for (act = delta; act; act = next)
5539 next = act->next_change;
5540 act->next_change = prev;
5541 prev = act;
5543 tmp = act->old_cp;
5544 act->old_cp = act->new_cp;
5545 act->new_cp = tmp;
5548 return prev;
5551 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5552 reverted instead. */
5554 static void
5555 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5556 struct iv_ca_delta *delta, bool forward)
5558 struct cost_pair *from, *to;
5559 struct iv_ca_delta *act;
5561 if (!forward)
5562 delta = iv_ca_delta_reverse (delta);
5564 for (act = delta; act; act = act->next_change)
5566 from = act->old_cp;
5567 to = act->new_cp;
5568 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5569 iv_ca_set_cp (data, ivs, act->use, to);
5572 if (!forward)
5573 iv_ca_delta_reverse (delta);
5576 /* Returns true if CAND is used in IVS. */
5578 static bool
5579 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5581 return ivs->n_cand_uses[cand->id] > 0;
5584 /* Returns number of induction variable candidates in the set IVS. */
5586 static unsigned
5587 iv_ca_n_cands (struct iv_ca *ivs)
5589 return ivs->n_cands;
5592 /* Free the list of changes DELTA. */
5594 static void
5595 iv_ca_delta_free (struct iv_ca_delta **delta)
5597 struct iv_ca_delta *act, *next;
5599 for (act = *delta; act; act = next)
5601 next = act->next_change;
5602 free (act);
5605 *delta = NULL;
5608 /* Allocates new iv candidates assignment. */
5610 static struct iv_ca *
5611 iv_ca_new (struct ivopts_data *data)
5613 struct iv_ca *nw = XNEW (struct iv_ca);
5615 nw->upto = 0;
5616 nw->bad_uses = 0;
5617 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5618 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5619 nw->cands = BITMAP_ALLOC (NULL);
5620 nw->n_cands = 0;
5621 nw->n_regs = 0;
5622 nw->cand_use_cost = no_cost;
5623 nw->cand_cost = 0;
5624 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5625 nw->cost = no_cost;
5626 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5627 nw->num_used_inv_expr = 0;
5629 return nw;
5632 /* Free memory occupied by the set IVS. */
5634 static void
5635 iv_ca_free (struct iv_ca **ivs)
5637 free ((*ivs)->cand_for_use);
5638 free ((*ivs)->n_cand_uses);
5639 BITMAP_FREE ((*ivs)->cands);
5640 free ((*ivs)->n_invariant_uses);
5641 free ((*ivs)->used_inv_expr);
5642 free (*ivs);
5643 *ivs = NULL;
5646 /* Dumps IVS to FILE. */
5648 static void
5649 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5651 const char *pref = " invariants ";
5652 unsigned i;
5653 comp_cost cost = iv_ca_cost (ivs);
5655 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5656 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5657 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5658 bitmap_print (file, ivs->cands, " candidates: ","\n");
5660 for (i = 0; i < ivs->upto; i++)
5662 struct iv_use *use = iv_use (data, i);
5663 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5664 if (cp)
5665 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5666 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5667 else
5668 fprintf (file, " use:%d --> ??\n", use->id);
5671 for (i = 1; i <= data->max_inv_id; i++)
5672 if (ivs->n_invariant_uses[i])
5674 fprintf (file, "%s%d", pref, i);
5675 pref = ", ";
5677 fprintf (file, "\n\n");
5680 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5681 new set, and store differences in DELTA. Number of induction variables
5682 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5683 the function will try to find a solution with mimimal iv candidates. */
5685 static comp_cost
5686 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5687 struct iv_cand *cand, struct iv_ca_delta **delta,
5688 unsigned *n_ivs, bool min_ncand)
5690 unsigned i;
5691 comp_cost cost;
5692 struct iv_use *use;
5693 struct cost_pair *old_cp, *new_cp;
5695 *delta = NULL;
5696 for (i = 0; i < ivs->upto; i++)
5698 use = iv_use (data, i);
5699 old_cp = iv_ca_cand_for_use (ivs, use);
5701 if (old_cp
5702 && old_cp->cand == cand)
5703 continue;
5705 new_cp = get_use_iv_cost (data, use, cand);
5706 if (!new_cp)
5707 continue;
5709 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5710 continue;
5712 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5713 continue;
5715 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5718 iv_ca_delta_commit (data, ivs, *delta, true);
5719 cost = iv_ca_cost (ivs);
5720 if (n_ivs)
5721 *n_ivs = iv_ca_n_cands (ivs);
5722 iv_ca_delta_commit (data, ivs, *delta, false);
5724 return cost;
5727 /* Try narrowing set IVS by removing CAND. Return the cost of
5728 the new set and store the differences in DELTA. START is
5729 the candidate with which we start narrowing. */
5731 static comp_cost
5732 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5733 struct iv_cand *cand, struct iv_cand *start,
5734 struct iv_ca_delta **delta)
5736 unsigned i, ci;
5737 struct iv_use *use;
5738 struct cost_pair *old_cp, *new_cp, *cp;
5739 bitmap_iterator bi;
5740 struct iv_cand *cnd;
5741 comp_cost cost, best_cost, acost;
5743 *delta = NULL;
5744 for (i = 0; i < n_iv_uses (data); i++)
5746 use = iv_use (data, i);
5748 old_cp = iv_ca_cand_for_use (ivs, use);
5749 if (old_cp->cand != cand)
5750 continue;
5752 best_cost = iv_ca_cost (ivs);
5753 /* Start narrowing with START. */
5754 new_cp = get_use_iv_cost (data, use, start);
5756 if (data->consider_all_candidates)
5758 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5760 if (ci == cand->id || (start && ci == start->id))
5761 continue;
5763 cnd = iv_cand (data, ci);
5765 cp = get_use_iv_cost (data, use, cnd);
5766 if (!cp)
5767 continue;
5769 iv_ca_set_cp (data, ivs, use, cp);
5770 acost = iv_ca_cost (ivs);
5772 if (compare_costs (acost, best_cost) < 0)
5774 best_cost = acost;
5775 new_cp = cp;
5779 else
5781 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5783 if (ci == cand->id || (start && ci == start->id))
5784 continue;
5786 cnd = iv_cand (data, ci);
5788 cp = get_use_iv_cost (data, use, cnd);
5789 if (!cp)
5790 continue;
5792 iv_ca_set_cp (data, ivs, use, cp);
5793 acost = iv_ca_cost (ivs);
5795 if (compare_costs (acost, best_cost) < 0)
5797 best_cost = acost;
5798 new_cp = cp;
5802 /* Restore to old cp for use. */
5803 iv_ca_set_cp (data, ivs, use, old_cp);
5805 if (!new_cp)
5807 iv_ca_delta_free (delta);
5808 return infinite_cost;
5811 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5814 iv_ca_delta_commit (data, ivs, *delta, true);
5815 cost = iv_ca_cost (ivs);
5816 iv_ca_delta_commit (data, ivs, *delta, false);
5818 return cost;
5821 /* Try optimizing the set of candidates IVS by removing candidates different
5822 from to EXCEPT_CAND from it. Return cost of the new set, and store
5823 differences in DELTA. */
5825 static comp_cost
5826 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5827 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5829 bitmap_iterator bi;
5830 struct iv_ca_delta *act_delta, *best_delta;
5831 unsigned i;
5832 comp_cost best_cost, acost;
5833 struct iv_cand *cand;
5835 best_delta = NULL;
5836 best_cost = iv_ca_cost (ivs);
5838 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5840 cand = iv_cand (data, i);
5842 if (cand == except_cand)
5843 continue;
5845 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5847 if (compare_costs (acost, best_cost) < 0)
5849 best_cost = acost;
5850 iv_ca_delta_free (&best_delta);
5851 best_delta = act_delta;
5853 else
5854 iv_ca_delta_free (&act_delta);
5857 if (!best_delta)
5859 *delta = NULL;
5860 return best_cost;
5863 /* Recurse to possibly remove other unnecessary ivs. */
5864 iv_ca_delta_commit (data, ivs, best_delta, true);
5865 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5866 iv_ca_delta_commit (data, ivs, best_delta, false);
5867 *delta = iv_ca_delta_join (best_delta, *delta);
5868 return best_cost;
5871 /* Tries to extend the sets IVS in the best possible way in order
5872 to express the USE. If ORIGINALP is true, prefer candidates from
5873 the original set of IVs, otherwise favor important candidates not
5874 based on any memory object. */
5876 static bool
5877 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5878 struct iv_use *use, bool originalp)
5880 comp_cost best_cost, act_cost;
5881 unsigned i;
5882 bitmap_iterator bi;
5883 struct iv_cand *cand;
5884 struct iv_ca_delta *best_delta = NULL, *act_delta;
5885 struct cost_pair *cp;
5887 iv_ca_add_use (data, ivs, use);
5888 best_cost = iv_ca_cost (ivs);
5889 cp = iv_ca_cand_for_use (ivs, use);
5890 if (cp)
5892 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5893 iv_ca_set_no_cp (data, ivs, use);
5896 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5897 first try important candidates not based on any memory object. Only if
5898 this fails, try the specific ones. Rationale -- in loops with many
5899 variables the best choice often is to use just one generic biv. If we
5900 added here many ivs specific to the uses, the optimization algorithm later
5901 would be likely to get stuck in a local minimum, thus causing us to create
5902 too many ivs. The approach from few ivs to more seems more likely to be
5903 successful -- starting from few ivs, replacing an expensive use by a
5904 specific iv should always be a win. */
5905 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5907 cand = iv_cand (data, i);
5909 if (originalp && cand->pos !=IP_ORIGINAL)
5910 continue;
5912 if (!originalp && cand->iv->base_object != NULL_TREE)
5913 continue;
5915 if (iv_ca_cand_used_p (ivs, cand))
5916 continue;
5918 cp = get_use_iv_cost (data, use, cand);
5919 if (!cp)
5920 continue;
5922 iv_ca_set_cp (data, ivs, use, cp);
5923 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5924 true);
5925 iv_ca_set_no_cp (data, ivs, use);
5926 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5928 if (compare_costs (act_cost, best_cost) < 0)
5930 best_cost = act_cost;
5932 iv_ca_delta_free (&best_delta);
5933 best_delta = act_delta;
5935 else
5936 iv_ca_delta_free (&act_delta);
5939 if (infinite_cost_p (best_cost))
5941 for (i = 0; i < use->n_map_members; i++)
5943 cp = use->cost_map + i;
5944 cand = cp->cand;
5945 if (!cand)
5946 continue;
5948 /* Already tried this. */
5949 if (cand->important)
5951 if (originalp && cand->pos == IP_ORIGINAL)
5952 continue;
5953 if (!originalp && cand->iv->base_object == NULL_TREE)
5954 continue;
5957 if (iv_ca_cand_used_p (ivs, cand))
5958 continue;
5960 act_delta = NULL;
5961 iv_ca_set_cp (data, ivs, use, cp);
5962 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5963 iv_ca_set_no_cp (data, ivs, use);
5964 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5965 cp, act_delta);
5967 if (compare_costs (act_cost, best_cost) < 0)
5969 best_cost = act_cost;
5971 if (best_delta)
5972 iv_ca_delta_free (&best_delta);
5973 best_delta = act_delta;
5975 else
5976 iv_ca_delta_free (&act_delta);
5980 iv_ca_delta_commit (data, ivs, best_delta, true);
5981 iv_ca_delta_free (&best_delta);
5983 return !infinite_cost_p (best_cost);
5986 /* Finds an initial assignment of candidates to uses. */
5988 static struct iv_ca *
5989 get_initial_solution (struct ivopts_data *data, bool originalp)
5991 struct iv_ca *ivs = iv_ca_new (data);
5992 unsigned i;
5994 for (i = 0; i < n_iv_uses (data); i++)
5995 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5997 iv_ca_free (&ivs);
5998 return NULL;
6001 return ivs;
6004 /* Tries to improve set of induction variables IVS. */
6006 static bool
6007 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6009 unsigned i, n_ivs;
6010 comp_cost acost, best_cost = iv_ca_cost (ivs);
6011 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6012 struct iv_cand *cand;
6014 /* Try extending the set of induction variables by one. */
6015 for (i = 0; i < n_iv_cands (data); i++)
6017 cand = iv_cand (data, i);
6019 if (iv_ca_cand_used_p (ivs, cand))
6020 continue;
6022 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6023 if (!act_delta)
6024 continue;
6026 /* If we successfully added the candidate and the set is small enough,
6027 try optimizing it by removing other candidates. */
6028 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6030 iv_ca_delta_commit (data, ivs, act_delta, true);
6031 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6032 iv_ca_delta_commit (data, ivs, act_delta, false);
6033 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6036 if (compare_costs (acost, best_cost) < 0)
6038 best_cost = acost;
6039 iv_ca_delta_free (&best_delta);
6040 best_delta = act_delta;
6042 else
6043 iv_ca_delta_free (&act_delta);
6046 if (!best_delta)
6048 /* Try removing the candidates from the set instead. */
6049 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6051 /* Nothing more we can do. */
6052 if (!best_delta)
6053 return false;
6056 iv_ca_delta_commit (data, ivs, best_delta, true);
6057 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6058 iv_ca_delta_free (&best_delta);
6059 return true;
6062 /* Attempts to find the optimal set of induction variables. We do simple
6063 greedy heuristic -- we try to replace at most one candidate in the selected
6064 solution and remove the unused ivs while this improves the cost. */
6066 static struct iv_ca *
6067 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6069 struct iv_ca *set;
6071 /* Get the initial solution. */
6072 set = get_initial_solution (data, originalp);
6073 if (!set)
6075 if (dump_file && (dump_flags & TDF_DETAILS))
6076 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6077 return NULL;
6080 if (dump_file && (dump_flags & TDF_DETAILS))
6082 fprintf (dump_file, "Initial set of candidates:\n");
6083 iv_ca_dump (data, dump_file, set);
6086 while (try_improve_iv_set (data, set))
6088 if (dump_file && (dump_flags & TDF_DETAILS))
6090 fprintf (dump_file, "Improved to:\n");
6091 iv_ca_dump (data, dump_file, set);
6095 return set;
6098 static struct iv_ca *
6099 find_optimal_iv_set (struct ivopts_data *data)
6101 unsigned i;
6102 struct iv_ca *set, *origset;
6103 struct iv_use *use;
6104 comp_cost cost, origcost;
6106 /* Determine the cost based on a strategy that starts with original IVs,
6107 and try again using a strategy that prefers candidates not based
6108 on any IVs. */
6109 origset = find_optimal_iv_set_1 (data, true);
6110 set = find_optimal_iv_set_1 (data, false);
6112 if (!origset && !set)
6113 return NULL;
6115 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6116 cost = set ? iv_ca_cost (set) : infinite_cost;
6118 if (dump_file && (dump_flags & TDF_DETAILS))
6120 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6121 origcost.cost, origcost.complexity);
6122 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6123 cost.cost, cost.complexity);
6126 /* Choose the one with the best cost. */
6127 if (compare_costs (origcost, cost) <= 0)
6129 if (set)
6130 iv_ca_free (&set);
6131 set = origset;
6133 else if (origset)
6134 iv_ca_free (&origset);
6136 for (i = 0; i < n_iv_uses (data); i++)
6138 use = iv_use (data, i);
6139 use->selected = iv_ca_cand_for_use (set, use)->cand;
6142 return set;
6145 /* Creates a new induction variable corresponding to CAND. */
6147 static void
6148 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6150 gimple_stmt_iterator incr_pos;
6151 tree base;
6152 bool after = false;
6154 if (!cand->iv)
6155 return;
6157 switch (cand->pos)
6159 case IP_NORMAL:
6160 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6161 break;
6163 case IP_END:
6164 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6165 after = true;
6166 break;
6168 case IP_AFTER_USE:
6169 after = true;
6170 /* fall through */
6171 case IP_BEFORE_USE:
6172 incr_pos = gsi_for_stmt (cand->incremented_at);
6173 break;
6175 case IP_ORIGINAL:
6176 /* Mark that the iv is preserved. */
6177 name_info (data, cand->var_before)->preserve_biv = true;
6178 name_info (data, cand->var_after)->preserve_biv = true;
6180 /* Rewrite the increment so that it uses var_before directly. */
6181 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6182 return;
6185 gimple_add_tmp_var (cand->var_before);
6187 base = unshare_expr (cand->iv->base);
6189 create_iv (base, unshare_expr (cand->iv->step),
6190 cand->var_before, data->current_loop,
6191 &incr_pos, after, &cand->var_before, &cand->var_after);
6194 /* Creates new induction variables described in SET. */
6196 static void
6197 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6199 unsigned i;
6200 struct iv_cand *cand;
6201 bitmap_iterator bi;
6203 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6205 cand = iv_cand (data, i);
6206 create_new_iv (data, cand);
6209 if (dump_file && (dump_flags & TDF_DETAILS))
6211 fprintf (dump_file, "\nSelected IV set: \n");
6212 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6214 cand = iv_cand (data, i);
6215 dump_cand (dump_file, cand);
6217 fprintf (dump_file, "\n");
6221 /* Rewrites USE (definition of iv used in a nonlinear expression)
6222 using candidate CAND. */
6224 static void
6225 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6226 struct iv_use *use, struct iv_cand *cand)
6228 tree comp;
6229 tree op, tgt;
6230 gimple ass;
6231 gimple_stmt_iterator bsi;
6233 /* An important special case -- if we are asked to express value of
6234 the original iv by itself, just exit; there is no need to
6235 introduce a new computation (that might also need casting the
6236 variable to unsigned and back). */
6237 if (cand->pos == IP_ORIGINAL
6238 && cand->incremented_at == use->stmt)
6240 enum tree_code stmt_code;
6242 gcc_assert (is_gimple_assign (use->stmt));
6243 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6245 /* Check whether we may leave the computation unchanged.
6246 This is the case only if it does not rely on other
6247 computations in the loop -- otherwise, the computation
6248 we rely upon may be removed in remove_unused_ivs,
6249 thus leading to ICE. */
6250 stmt_code = gimple_assign_rhs_code (use->stmt);
6251 if (stmt_code == PLUS_EXPR
6252 || stmt_code == MINUS_EXPR
6253 || stmt_code == POINTER_PLUS_EXPR)
6255 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6256 op = gimple_assign_rhs2 (use->stmt);
6257 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6258 op = gimple_assign_rhs1 (use->stmt);
6259 else
6260 op = NULL_TREE;
6262 else
6263 op = NULL_TREE;
6265 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6266 return;
6269 comp = get_computation (data->current_loop, use, cand);
6270 gcc_assert (comp != NULL_TREE);
6272 switch (gimple_code (use->stmt))
6274 case GIMPLE_PHI:
6275 tgt = PHI_RESULT (use->stmt);
6277 /* If we should keep the biv, do not replace it. */
6278 if (name_info (data, tgt)->preserve_biv)
6279 return;
6281 bsi = gsi_after_labels (gimple_bb (use->stmt));
6282 break;
6284 case GIMPLE_ASSIGN:
6285 tgt = gimple_assign_lhs (use->stmt);
6286 bsi = gsi_for_stmt (use->stmt);
6287 break;
6289 default:
6290 gcc_unreachable ();
6293 if (!valid_gimple_rhs_p (comp)
6294 || (gimple_code (use->stmt) != GIMPLE_PHI
6295 /* We can't allow re-allocating the stmt as it might be pointed
6296 to still. */
6297 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6298 >= gimple_num_ops (gsi_stmt (bsi)))))
6300 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6301 true, GSI_SAME_STMT);
6302 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6304 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6305 /* As this isn't a plain copy we have to reset alignment
6306 information. */
6307 if (SSA_NAME_PTR_INFO (comp))
6308 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6312 if (gimple_code (use->stmt) == GIMPLE_PHI)
6314 ass = gimple_build_assign (tgt, comp);
6315 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6317 bsi = gsi_for_stmt (use->stmt);
6318 remove_phi_node (&bsi, false);
6320 else
6322 gimple_assign_set_rhs_from_tree (&bsi, comp);
6323 use->stmt = gsi_stmt (bsi);
6327 /* Performs a peephole optimization to reorder the iv update statement with
6328 a mem ref to enable instruction combining in later phases. The mem ref uses
6329 the iv value before the update, so the reordering transformation requires
6330 adjustment of the offset. CAND is the selected IV_CAND.
6332 Example:
6334 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6335 iv2 = iv1 + 1;
6337 if (t < val) (1)
6338 goto L;
6339 goto Head;
6342 directly propagating t over to (1) will introduce overlapping live range
6343 thus increase register pressure. This peephole transform it into:
6346 iv2 = iv1 + 1;
6347 t = MEM_REF (base, iv2, 8, 8);
6348 if (t < val)
6349 goto L;
6350 goto Head;
6353 static void
6354 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6356 tree var_after;
6357 gimple iv_update, stmt;
6358 basic_block bb;
6359 gimple_stmt_iterator gsi, gsi_iv;
6361 if (cand->pos != IP_NORMAL)
6362 return;
6364 var_after = cand->var_after;
6365 iv_update = SSA_NAME_DEF_STMT (var_after);
6367 bb = gimple_bb (iv_update);
6368 gsi = gsi_last_nondebug_bb (bb);
6369 stmt = gsi_stmt (gsi);
6371 /* Only handle conditional statement for now. */
6372 if (gimple_code (stmt) != GIMPLE_COND)
6373 return;
6375 gsi_prev_nondebug (&gsi);
6376 stmt = gsi_stmt (gsi);
6377 if (stmt != iv_update)
6378 return;
6380 gsi_prev_nondebug (&gsi);
6381 if (gsi_end_p (gsi))
6382 return;
6384 stmt = gsi_stmt (gsi);
6385 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6386 return;
6388 if (stmt != use->stmt)
6389 return;
6391 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6392 return;
6394 if (dump_file && (dump_flags & TDF_DETAILS))
6396 fprintf (dump_file, "Reordering \n");
6397 print_gimple_stmt (dump_file, iv_update, 0, 0);
6398 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6399 fprintf (dump_file, "\n");
6402 gsi = gsi_for_stmt (use->stmt);
6403 gsi_iv = gsi_for_stmt (iv_update);
6404 gsi_move_before (&gsi_iv, &gsi);
6406 cand->pos = IP_BEFORE_USE;
6407 cand->incremented_at = use->stmt;
6410 /* Rewrites USE (address that is an iv) using candidate CAND. */
6412 static void
6413 rewrite_use_address (struct ivopts_data *data,
6414 struct iv_use *use, struct iv_cand *cand)
6416 aff_tree aff;
6417 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6418 tree base_hint = NULL_TREE;
6419 tree ref, iv;
6420 bool ok;
6422 adjust_iv_update_pos (cand, use);
6423 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6424 gcc_assert (ok);
6425 unshare_aff_combination (&aff);
6427 /* To avoid undefined overflow problems, all IV candidates use unsigned
6428 integer types. The drawback is that this makes it impossible for
6429 create_mem_ref to distinguish an IV that is based on a memory object
6430 from one that represents simply an offset.
6432 To work around this problem, we pass a hint to create_mem_ref that
6433 indicates which variable (if any) in aff is an IV based on a memory
6434 object. Note that we only consider the candidate. If this is not
6435 based on an object, the base of the reference is in some subexpression
6436 of the use -- but these will use pointer types, so they are recognized
6437 by the create_mem_ref heuristics anyway. */
6438 if (cand->iv->base_object)
6439 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6441 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6442 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6443 reference_alias_ptr_type (*use->op_p),
6444 iv, base_hint, data->speed);
6445 copy_ref_info (ref, *use->op_p);
6446 *use->op_p = ref;
6449 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6450 candidate CAND. */
6452 static void
6453 rewrite_use_compare (struct ivopts_data *data,
6454 struct iv_use *use, struct iv_cand *cand)
6456 tree comp, *var_p, op, bound;
6457 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6458 enum tree_code compare;
6459 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6460 bool ok;
6462 bound = cp->value;
6463 if (bound)
6465 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6466 tree var_type = TREE_TYPE (var);
6467 gimple_seq stmts;
6469 if (dump_file && (dump_flags & TDF_DETAILS))
6471 fprintf (dump_file, "Replacing exit test: ");
6472 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6474 compare = cp->comp;
6475 bound = unshare_expr (fold_convert (var_type, bound));
6476 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6477 if (stmts)
6478 gsi_insert_seq_on_edge_immediate (
6479 loop_preheader_edge (data->current_loop),
6480 stmts);
6482 gimple_cond_set_lhs (use->stmt, var);
6483 gimple_cond_set_code (use->stmt, compare);
6484 gimple_cond_set_rhs (use->stmt, op);
6485 return;
6488 /* The induction variable elimination failed; just express the original
6489 giv. */
6490 comp = get_computation (data->current_loop, use, cand);
6491 gcc_assert (comp != NULL_TREE);
6493 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6494 gcc_assert (ok);
6496 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6497 true, GSI_SAME_STMT);
6500 /* Rewrites USE using candidate CAND. */
6502 static void
6503 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6505 switch (use->type)
6507 case USE_NONLINEAR_EXPR:
6508 rewrite_use_nonlinear_expr (data, use, cand);
6509 break;
6511 case USE_ADDRESS:
6512 rewrite_use_address (data, use, cand);
6513 break;
6515 case USE_COMPARE:
6516 rewrite_use_compare (data, use, cand);
6517 break;
6519 default:
6520 gcc_unreachable ();
6523 update_stmt (use->stmt);
6526 /* Rewrite the uses using the selected induction variables. */
6528 static void
6529 rewrite_uses (struct ivopts_data *data)
6531 unsigned i;
6532 struct iv_cand *cand;
6533 struct iv_use *use;
6535 for (i = 0; i < n_iv_uses (data); i++)
6537 use = iv_use (data, i);
6538 cand = use->selected;
6539 gcc_assert (cand);
6541 rewrite_use (data, use, cand);
6545 /* Removes the ivs that are not used after rewriting. */
6547 static void
6548 remove_unused_ivs (struct ivopts_data *data)
6550 unsigned j;
6551 bitmap_iterator bi;
6552 bitmap toremove = BITMAP_ALLOC (NULL);
6554 /* Figure out an order in which to release SSA DEFs so that we don't
6555 release something that we'd have to propagate into a debug stmt
6556 afterwards. */
6557 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6559 struct version_info *info;
6561 info = ver_info (data, j);
6562 if (info->iv
6563 && !integer_zerop (info->iv->step)
6564 && !info->inv_id
6565 && !info->iv->have_use_for
6566 && !info->preserve_biv)
6568 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6570 tree def = info->iv->ssa_name;
6572 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6574 imm_use_iterator imm_iter;
6575 use_operand_p use_p;
6576 gimple stmt;
6577 int count = 0;
6579 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6581 if (!gimple_debug_bind_p (stmt))
6582 continue;
6584 /* We just want to determine whether to do nothing
6585 (count == 0), to substitute the computed
6586 expression into a single use of the SSA DEF by
6587 itself (count == 1), or to use a debug temp
6588 because the SSA DEF is used multiple times or as
6589 part of a larger expression (count > 1). */
6590 count++;
6591 if (gimple_debug_bind_get_value (stmt) != def)
6592 count++;
6594 if (count > 1)
6595 BREAK_FROM_IMM_USE_STMT (imm_iter);
6598 if (!count)
6599 continue;
6601 struct iv_use dummy_use;
6602 struct iv_cand *best_cand = NULL, *cand;
6603 unsigned i, best_pref = 0, cand_pref;
6605 memset (&dummy_use, 0, sizeof (dummy_use));
6606 dummy_use.iv = info->iv;
6607 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6609 cand = iv_use (data, i)->selected;
6610 if (cand == best_cand)
6611 continue;
6612 cand_pref = operand_equal_p (cand->iv->step,
6613 info->iv->step, 0)
6614 ? 4 : 0;
6615 cand_pref
6616 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6617 == TYPE_MODE (TREE_TYPE (info->iv->base))
6618 ? 2 : 0;
6619 cand_pref
6620 += TREE_CODE (cand->iv->base) == INTEGER_CST
6621 ? 1 : 0;
6622 if (best_cand == NULL || best_pref < cand_pref)
6624 best_cand = cand;
6625 best_pref = cand_pref;
6629 if (!best_cand)
6630 continue;
6632 tree comp = get_computation_at (data->current_loop,
6633 &dummy_use, best_cand,
6634 SSA_NAME_DEF_STMT (def));
6635 if (!comp)
6636 continue;
6638 if (count > 1)
6640 tree vexpr = make_node (DEBUG_EXPR_DECL);
6641 DECL_ARTIFICIAL (vexpr) = 1;
6642 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6643 if (SSA_NAME_VAR (def))
6644 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6645 else
6646 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6647 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6648 gimple_stmt_iterator gsi;
6650 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6651 gsi = gsi_after_labels (gimple_bb
6652 (SSA_NAME_DEF_STMT (def)));
6653 else
6654 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6656 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6657 comp = vexpr;
6660 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6662 if (!gimple_debug_bind_p (stmt))
6663 continue;
6665 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6666 SET_USE (use_p, comp);
6668 update_stmt (stmt);
6674 release_defs_bitset (toremove);
6676 BITMAP_FREE (toremove);
6679 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6680 for hash_map::traverse. */
6682 bool
6683 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6685 free (value);
6686 return true;
6689 /* Frees data allocated by the optimization of a single loop. */
6691 static void
6692 free_loop_data (struct ivopts_data *data)
6694 unsigned i, j;
6695 bitmap_iterator bi;
6696 tree obj;
6698 if (data->niters)
6700 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6701 delete data->niters;
6702 data->niters = NULL;
6705 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6707 struct version_info *info;
6709 info = ver_info (data, i);
6710 free (info->iv);
6711 info->iv = NULL;
6712 info->has_nonlin_use = false;
6713 info->preserve_biv = false;
6714 info->inv_id = 0;
6716 bitmap_clear (data->relevant);
6717 bitmap_clear (data->important_candidates);
6719 for (i = 0; i < n_iv_uses (data); i++)
6721 struct iv_use *use = iv_use (data, i);
6723 free (use->iv);
6724 BITMAP_FREE (use->related_cands);
6725 for (j = 0; j < use->n_map_members; j++)
6726 if (use->cost_map[j].depends_on)
6727 BITMAP_FREE (use->cost_map[j].depends_on);
6728 free (use->cost_map);
6729 free (use);
6731 data->iv_uses.truncate (0);
6733 for (i = 0; i < n_iv_cands (data); i++)
6735 struct iv_cand *cand = iv_cand (data, i);
6737 free (cand->iv);
6738 if (cand->depends_on)
6739 BITMAP_FREE (cand->depends_on);
6740 free (cand);
6742 data->iv_candidates.truncate (0);
6744 if (data->version_info_size < num_ssa_names)
6746 data->version_info_size = 2 * num_ssa_names;
6747 free (data->version_info);
6748 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6751 data->max_inv_id = 0;
6753 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6754 SET_DECL_RTL (obj, NULL_RTX);
6756 decl_rtl_to_reset.truncate (0);
6758 data->inv_expr_tab->empty ();
6759 data->inv_expr_id = 0;
6762 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6763 loop tree. */
6765 static void
6766 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6768 free_loop_data (data);
6769 free (data->version_info);
6770 BITMAP_FREE (data->relevant);
6771 BITMAP_FREE (data->important_candidates);
6773 decl_rtl_to_reset.release ();
6774 data->iv_uses.release ();
6775 data->iv_candidates.release ();
6776 delete data->inv_expr_tab;
6777 data->inv_expr_tab = NULL;
6778 free_affine_expand_cache (&data->name_expansion_cache);
6781 /* Returns true if the loop body BODY includes any function calls. */
6783 static bool
6784 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6786 gimple_stmt_iterator gsi;
6787 unsigned i;
6789 for (i = 0; i < num_nodes; i++)
6790 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6792 gimple stmt = gsi_stmt (gsi);
6793 if (is_gimple_call (stmt)
6794 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6795 return true;
6797 return false;
6800 /* Optimizes the LOOP. Returns true if anything changed. */
6802 static bool
6803 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6805 bool changed = false;
6806 struct iv_ca *iv_ca;
6807 edge exit = single_dom_exit (loop);
6808 basic_block *body;
6810 gcc_assert (!data->niters);
6811 data->current_loop = loop;
6812 data->speed = optimize_loop_for_speed_p (loop);
6814 if (dump_file && (dump_flags & TDF_DETAILS))
6816 fprintf (dump_file, "Processing loop %d\n", loop->num);
6818 if (exit)
6820 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6821 exit->src->index, exit->dest->index);
6822 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6823 fprintf (dump_file, "\n");
6826 fprintf (dump_file, "\n");
6829 body = get_loop_body (loop);
6830 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6831 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6832 free (body);
6834 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6836 /* For each ssa name determines whether it behaves as an induction variable
6837 in some loop. */
6838 if (!find_induction_variables (data))
6839 goto finish;
6841 /* Finds interesting uses (item 1). */
6842 find_interesting_uses (data);
6843 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6844 goto finish;
6846 /* Finds candidates for the induction variables (item 2). */
6847 find_iv_candidates (data);
6849 /* Calculates the costs (item 3, part 1). */
6850 determine_iv_costs (data);
6851 determine_use_iv_costs (data);
6852 determine_set_costs (data);
6854 /* Find the optimal set of induction variables (item 3, part 2). */
6855 iv_ca = find_optimal_iv_set (data);
6856 if (!iv_ca)
6857 goto finish;
6858 changed = true;
6860 /* Create the new induction variables (item 4, part 1). */
6861 create_new_ivs (data, iv_ca);
6862 iv_ca_free (&iv_ca);
6864 /* Rewrite the uses (item 4, part 2). */
6865 rewrite_uses (data);
6867 /* Remove the ivs that are unused after rewriting. */
6868 remove_unused_ivs (data);
6870 /* We have changed the structure of induction variables; it might happen
6871 that definitions in the scev database refer to some of them that were
6872 eliminated. */
6873 scev_reset ();
6875 finish:
6876 free_loop_data (data);
6878 return changed;
6881 /* Main entry point. Optimizes induction variables in loops. */
6883 void
6884 tree_ssa_iv_optimize (void)
6886 struct loop *loop;
6887 struct ivopts_data data;
6889 tree_ssa_iv_optimize_init (&data);
6891 /* Optimize the loops starting with the innermost ones. */
6892 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6894 if (dump_file && (dump_flags & TDF_DETAILS))
6895 flow_loop_dump (loop, dump_file, NULL, 1);
6897 tree_ssa_iv_optimize_loop (&data, loop);
6900 tree_ssa_iv_optimize_finalize (&data);