Concretize gimple_cond_set_code
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob76dc7d8f0deb250ea82bd180cc2875ca10b85698
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
136 return niter;
139 /* Representation of the induction variable. */
140 struct iv
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
162 /* Types of uses. */
163 enum use_type
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
170 /* Cost of a computation. */
171 typedef struct
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
183 /* The candidate - cost pair. */
184 struct cost_pair
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
197 /* Use. */
198 struct iv_use
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
212 struct iv_cand *selected;
213 /* The selected candidate. */
216 /* The position where the iv is computed. */
217 enum iv_position
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
226 /* The induction variable candidate. */
227 struct iv_cand
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
252 tree expr;
253 int id;
254 hashval_t hash;
257 /* The data used by the induction variable optimizations. */
259 typedef struct iv_use *iv_use_p;
261 typedef struct iv_cand *iv_cand_p;
263 /* Hashtable helpers. */
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
273 /* Hash function for loop invariant expressions. */
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
278 return expr->hash;
281 /* Hash table equality function for expressions. */
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
290 struct ivopts_data
292 /* The currently optimized loop. */
293 struct loop *current_loop;
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
298 /* Number of registers used in it. */
299 unsigned regs_used;
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
311 /* Loop invariant expression id. */
312 int inv_expr_id;
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
326 /* Cache used by tree_to_aff_combination_expand. */
327 hash_map<tree, name_expansion *> *name_expansion_cache;
329 /* The maximum invariant id. */
330 unsigned max_inv_id;
332 /* Whether to consider just related and important candidates when replacing a
333 use. */
334 bool consider_all_candidates;
336 /* Are we optimizing for speed? */
337 bool speed;
339 /* Whether the loop body includes any function calls. */
340 bool body_includes_call;
342 /* Whether the loop body can only be exited via single exit. */
343 bool loop_single_exit_p;
346 /* An assignment of iv candidates to uses. */
348 struct iv_ca
350 /* The number of uses covered by the assignment. */
351 unsigned upto;
353 /* Number of uses that cannot be expressed by the candidates in the set. */
354 unsigned bad_uses;
356 /* Candidate assigned to a use, together with the related costs. */
357 struct cost_pair **cand_for_use;
359 /* Number of times each candidate is used. */
360 unsigned *n_cand_uses;
362 /* The candidates used. */
363 bitmap cands;
365 /* The number of candidates in the set. */
366 unsigned n_cands;
368 /* Total number of registers needed. */
369 unsigned n_regs;
371 /* Total cost of expressing uses. */
372 comp_cost cand_use_cost;
374 /* Total cost of candidates. */
375 unsigned cand_cost;
377 /* Number of times each invariant is used. */
378 unsigned *n_invariant_uses;
380 /* The array holding the number of uses of each loop
381 invariant expressions created by ivopt. */
382 unsigned *used_inv_expr;
384 /* The number of created loop invariants. */
385 unsigned num_used_inv_expr;
387 /* Total cost of the assignment. */
388 comp_cost cost;
391 /* Difference of two iv candidate assignments. */
393 struct iv_ca_delta
395 /* Changed use. */
396 struct iv_use *use;
398 /* An old assignment (for rollback purposes). */
399 struct cost_pair *old_cp;
401 /* A new assignment. */
402 struct cost_pair *new_cp;
404 /* Next change in the list. */
405 struct iv_ca_delta *next_change;
408 /* Bound on number of candidates below that all candidates are considered. */
410 #define CONSIDER_ALL_CANDIDATES_BOUND \
411 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
414 optimizing such a loop would help, and it would take ages). */
416 #define MAX_CONSIDERED_USES \
417 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
419 /* If there are at most this number of ivs in the set, try removing unnecessary
420 ivs from the set always. */
422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
425 /* The list of trees for that the decl_rtl field must be reset is stored
426 here. */
428 static vec<tree> decl_rtl_to_reset;
430 static comp_cost force_expr_to_var_cost (tree, bool);
432 /* Number of uses recorded in DATA. */
434 static inline unsigned
435 n_iv_uses (struct ivopts_data *data)
437 return data->iv_uses.length ();
440 /* Ith use recorded in DATA. */
442 static inline struct iv_use *
443 iv_use (struct ivopts_data *data, unsigned i)
445 return data->iv_uses[i];
448 /* Number of candidates recorded in DATA. */
450 static inline unsigned
451 n_iv_cands (struct ivopts_data *data)
453 return data->iv_candidates.length ();
456 /* Ith candidate recorded in DATA. */
458 static inline struct iv_cand *
459 iv_cand (struct ivopts_data *data, unsigned i)
461 return data->iv_candidates[i];
464 /* The single loop exit if it dominates the latch, NULL otherwise. */
466 edge
467 single_dom_exit (struct loop *loop)
469 edge exit = single_exit (loop);
471 if (!exit)
472 return NULL;
474 if (!just_once_each_iteration_p (loop, exit->src))
475 return NULL;
477 return exit;
480 /* Dumps information about the induction variable IV to FILE. */
482 void
483 dump_iv (FILE *file, struct iv *iv)
485 if (iv->ssa_name)
487 fprintf (file, "ssa name ");
488 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
489 fprintf (file, "\n");
492 fprintf (file, " type ");
493 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
494 fprintf (file, "\n");
496 if (iv->step)
498 fprintf (file, " base ");
499 print_generic_expr (file, iv->base, TDF_SLIM);
500 fprintf (file, "\n");
502 fprintf (file, " step ");
503 print_generic_expr (file, iv->step, TDF_SLIM);
504 fprintf (file, "\n");
506 else
508 fprintf (file, " invariant ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
513 if (iv->base_object)
515 fprintf (file, " base object ");
516 print_generic_expr (file, iv->base_object, TDF_SLIM);
517 fprintf (file, "\n");
520 if (iv->biv_p)
521 fprintf (file, " is a biv\n");
524 /* Dumps information about the USE to FILE. */
526 void
527 dump_use (FILE *file, struct iv_use *use)
529 fprintf (file, "use %d\n", use->id);
531 switch (use->type)
533 case USE_NONLINEAR_EXPR:
534 fprintf (file, " generic\n");
535 break;
537 case USE_ADDRESS:
538 fprintf (file, " address\n");
539 break;
541 case USE_COMPARE:
542 fprintf (file, " compare\n");
543 break;
545 default:
546 gcc_unreachable ();
549 fprintf (file, " in statement ");
550 print_gimple_stmt (file, use->stmt, 0, 0);
551 fprintf (file, "\n");
553 fprintf (file, " at position ");
554 if (use->op_p)
555 print_generic_expr (file, *use->op_p, TDF_SLIM);
556 fprintf (file, "\n");
558 dump_iv (file, use->iv);
560 if (use->related_cands)
562 fprintf (file, " related candidates ");
563 dump_bitmap (file, use->related_cands);
567 /* Dumps information about the uses to FILE. */
569 void
570 dump_uses (FILE *file, struct ivopts_data *data)
572 unsigned i;
573 struct iv_use *use;
575 for (i = 0; i < n_iv_uses (data); i++)
577 use = iv_use (data, i);
579 dump_use (file, use);
580 fprintf (file, "\n");
584 /* Dumps information about induction variable candidate CAND to FILE. */
586 void
587 dump_cand (FILE *file, struct iv_cand *cand)
589 struct iv *iv = cand->iv;
591 fprintf (file, "candidate %d%s\n",
592 cand->id, cand->important ? " (important)" : "");
594 if (cand->depends_on)
596 fprintf (file, " depends on ");
597 dump_bitmap (file, cand->depends_on);
600 if (!iv)
602 fprintf (file, " final value replacement\n");
603 return;
606 if (cand->var_before)
608 fprintf (file, " var_before ");
609 print_generic_expr (file, cand->var_before, TDF_SLIM);
610 fprintf (file, "\n");
612 if (cand->var_after)
614 fprintf (file, " var_after ");
615 print_generic_expr (file, cand->var_after, TDF_SLIM);
616 fprintf (file, "\n");
619 switch (cand->pos)
621 case IP_NORMAL:
622 fprintf (file, " incremented before exit test\n");
623 break;
625 case IP_BEFORE_USE:
626 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
627 break;
629 case IP_AFTER_USE:
630 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
631 break;
633 case IP_END:
634 fprintf (file, " incremented at end\n");
635 break;
637 case IP_ORIGINAL:
638 fprintf (file, " original biv\n");
639 break;
642 dump_iv (file, iv);
645 /* Returns the info for ssa version VER. */
647 static inline struct version_info *
648 ver_info (struct ivopts_data *data, unsigned ver)
650 return data->version_info + ver;
653 /* Returns the info for ssa name NAME. */
655 static inline struct version_info *
656 name_info (struct ivopts_data *data, tree name)
658 return ver_info (data, SSA_NAME_VERSION (name));
661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 emitted in LOOP. */
664 static bool
665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
667 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
669 gcc_assert (bb);
671 if (sbb == loop->latch)
672 return true;
674 if (sbb != bb)
675 return false;
677 return stmt == last_stmt (bb);
680 /* Returns true if STMT if after the place where the original induction
681 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
682 if the positions are identical. */
684 static bool
685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
687 basic_block cand_bb = gimple_bb (cand->incremented_at);
688 basic_block stmt_bb = gimple_bb (stmt);
690 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
691 return false;
693 if (stmt_bb != cand_bb)
694 return true;
696 if (true_if_equal
697 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
698 return true;
699 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
702 /* Returns true if STMT if after the place where the induction variable
703 CAND is incremented in LOOP. */
705 static bool
706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
708 switch (cand->pos)
710 case IP_END:
711 return false;
713 case IP_NORMAL:
714 return stmt_after_ip_normal_pos (loop, stmt);
716 case IP_ORIGINAL:
717 case IP_AFTER_USE:
718 return stmt_after_inc_pos (cand, stmt, false);
720 case IP_BEFORE_USE:
721 return stmt_after_inc_pos (cand, stmt, true);
723 default:
724 gcc_unreachable ();
728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
730 static bool
731 abnormal_ssa_name_p (tree exp)
733 if (!exp)
734 return false;
736 if (TREE_CODE (exp) != SSA_NAME)
737 return false;
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
743 abnormal phi node. Callback for for_each_index. */
745 static bool
746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
747 void *data ATTRIBUTE_UNUSED)
749 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
751 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
752 return false;
753 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
754 return false;
757 return !abnormal_ssa_name_p (*index);
760 /* Returns true if EXPR contains a ssa name that occurs in an
761 abnormal phi node. */
763 bool
764 contains_abnormal_ssa_name_p (tree expr)
766 enum tree_code code;
767 enum tree_code_class codeclass;
769 if (!expr)
770 return false;
772 code = TREE_CODE (expr);
773 codeclass = TREE_CODE_CLASS (code);
775 if (code == SSA_NAME)
776 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
778 if (code == INTEGER_CST
779 || is_gimple_min_invariant (expr))
780 return false;
782 if (code == ADDR_EXPR)
783 return !for_each_index (&TREE_OPERAND (expr, 0),
784 idx_contains_abnormal_ssa_name_p,
785 NULL);
787 if (code == COND_EXPR)
788 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
789 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
790 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
792 switch (codeclass)
794 case tcc_binary:
795 case tcc_comparison:
796 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
797 return true;
799 /* Fallthru. */
800 case tcc_unary:
801 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
802 return true;
804 break;
806 default:
807 gcc_unreachable ();
810 return false;
813 /* Returns the structure describing number of iterations determined from
814 EXIT of DATA->current_loop, or NULL if something goes wrong. */
816 static struct tree_niter_desc *
817 niter_for_exit (struct ivopts_data *data, edge exit)
819 struct tree_niter_desc *desc;
820 tree_niter_desc **slot;
822 if (!data->niters)
824 data->niters = new hash_map<edge, tree_niter_desc *>;
825 slot = NULL;
827 else
828 slot = data->niters->get (exit);
830 if (!slot)
832 /* Try to determine number of iterations. We cannot safely work with ssa
833 names that appear in phi nodes on abnormal edges, so that we do not
834 create overlapping life ranges for them (PR 27283). */
835 desc = XNEW (struct tree_niter_desc);
836 if (!number_of_iterations_exit (data->current_loop,
837 exit, desc, true)
838 || contains_abnormal_ssa_name_p (desc->niter))
840 XDELETE (desc);
841 desc = NULL;
843 data->niters->put (exit, desc);
845 else
846 desc = *slot;
848 return desc;
851 /* Returns the structure describing number of iterations determined from
852 single dominating exit of DATA->current_loop, or NULL if something
853 goes wrong. */
855 static struct tree_niter_desc *
856 niter_for_single_dom_exit (struct ivopts_data *data)
858 edge exit = single_dom_exit (data->current_loop);
860 if (!exit)
861 return NULL;
863 return niter_for_exit (data, exit);
866 /* Initializes data structures used by the iv optimization pass, stored
867 in DATA. */
869 static void
870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
872 data->version_info_size = 2 * num_ssa_names;
873 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
874 data->relevant = BITMAP_ALLOC (NULL);
875 data->important_candidates = BITMAP_ALLOC (NULL);
876 data->max_inv_id = 0;
877 data->niters = NULL;
878 data->iv_uses.create (20);
879 data->iv_candidates.create (20);
880 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
881 data->inv_expr_id = 0;
882 data->name_expansion_cache = NULL;
883 decl_rtl_to_reset.create (20);
886 /* Returns a memory object to that EXPR points. In case we are able to
887 determine that it does not point to any such object, NULL is returned. */
889 static tree
890 determine_base_object (tree expr)
892 enum tree_code code = TREE_CODE (expr);
893 tree base, obj;
895 /* If this is a pointer casted to any type, we need to determine
896 the base object for the pointer; so handle conversions before
897 throwing away non-pointer expressions. */
898 if (CONVERT_EXPR_P (expr))
899 return determine_base_object (TREE_OPERAND (expr, 0));
901 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
902 return NULL_TREE;
904 switch (code)
906 case INTEGER_CST:
907 return NULL_TREE;
909 case ADDR_EXPR:
910 obj = TREE_OPERAND (expr, 0);
911 base = get_base_address (obj);
913 if (!base)
914 return expr;
916 if (TREE_CODE (base) == MEM_REF)
917 return determine_base_object (TREE_OPERAND (base, 0));
919 return fold_convert (ptr_type_node,
920 build_fold_addr_expr (base));
922 case POINTER_PLUS_EXPR:
923 return determine_base_object (TREE_OPERAND (expr, 0));
925 case PLUS_EXPR:
926 case MINUS_EXPR:
927 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
928 gcc_unreachable ();
930 default:
931 return fold_convert (ptr_type_node, expr);
935 /* Return true if address expression with non-DECL_P operand appears
936 in EXPR. */
938 static bool
939 contain_complex_addr_expr (tree expr)
941 bool res = false;
943 STRIP_NOPS (expr);
944 switch (TREE_CODE (expr))
946 case POINTER_PLUS_EXPR:
947 case PLUS_EXPR:
948 case MINUS_EXPR:
949 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
950 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
951 break;
953 case ADDR_EXPR:
954 return (!DECL_P (TREE_OPERAND (expr, 0)));
956 default:
957 return false;
960 return res;
963 /* Allocates an induction variable with given initial value BASE and step STEP
964 for loop LOOP. */
966 static struct iv *
967 alloc_iv (tree base, tree step)
969 tree expr = base;
970 struct iv *iv = XCNEW (struct iv);
971 gcc_assert (step != NULL_TREE);
973 /* Lower address expression in base except ones with DECL_P as operand.
974 By doing this:
975 1) More accurate cost can be computed for address expressions;
976 2) Duplicate candidates won't be created for bases in different
977 forms, like &a[0] and &a. */
978 STRIP_NOPS (expr);
979 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
980 || contain_complex_addr_expr (expr))
982 aff_tree comb;
983 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
984 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
987 iv->base = base;
988 iv->base_object = determine_base_object (base);
989 iv->step = step;
990 iv->biv_p = false;
991 iv->have_use_for = false;
992 iv->use_id = 0;
993 iv->ssa_name = NULL_TREE;
995 return iv;
998 /* Sets STEP and BASE for induction variable IV. */
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1003 struct version_info *info = name_info (data, iv);
1005 gcc_assert (!info->iv);
1007 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008 info->iv = alloc_iv (base, step);
1009 info->iv->ssa_name = iv;
1012 /* Finds induction variable declaration for VAR. */
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1017 basic_block bb;
1018 tree type = TREE_TYPE (var);
1020 if (!POINTER_TYPE_P (type)
1021 && !INTEGRAL_TYPE_P (type))
1022 return NULL;
1024 if (!name_info (data, var)->iv)
1026 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1028 if (!bb
1029 || !flow_bb_inside_loop_p (data->current_loop, bb))
1030 set_iv (data, var, var, build_int_cst (type, 0));
1033 return name_info (data, var)->iv;
1036 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1037 not define a simple affine biv with nonzero step. */
1039 static tree
1040 determine_biv_step (gimple_phi phi)
1042 struct loop *loop = gimple_bb (phi)->loop_father;
1043 tree name = PHI_RESULT (phi);
1044 affine_iv iv;
1046 if (virtual_operand_p (name))
1047 return NULL_TREE;
1049 if (!simple_iv (loop, loop, name, &iv, true))
1050 return NULL_TREE;
1052 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1055 /* Finds basic ivs. */
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1060 gimple_phi phi;
1061 tree step, type, base;
1062 bool found = false;
1063 struct loop *loop = data->current_loop;
1064 gimple_phi_iterator psi;
1066 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1068 phi = psi.phi ();
1070 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071 continue;
1073 step = determine_biv_step (phi);
1074 if (!step)
1075 continue;
1077 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078 base = expand_simple_operations (base);
1079 if (contains_abnormal_ssa_name_p (base)
1080 || contains_abnormal_ssa_name_p (step))
1081 continue;
1083 type = TREE_TYPE (PHI_RESULT (phi));
1084 base = fold_convert (type, base);
1085 if (step)
1087 if (POINTER_TYPE_P (type))
1088 step = convert_to_ptrofftype (step);
1089 else
1090 step = fold_convert (type, step);
1093 set_iv (data, PHI_RESULT (phi), base, step);
1094 found = true;
1097 return found;
1100 /* Marks basic ivs. */
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1105 gimple_phi phi;
1106 gimple def;
1107 tree var;
1108 struct iv *iv, *incr_iv;
1109 struct loop *loop = data->current_loop;
1110 basic_block incr_bb;
1111 gimple_phi_iterator psi;
1113 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1115 phi = psi.phi ();
1117 iv = get_iv (data, PHI_RESULT (phi));
1118 if (!iv)
1119 continue;
1121 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1122 def = SSA_NAME_DEF_STMT (var);
1123 /* Don't mark iv peeled from other one as biv. */
1124 if (def
1125 && gimple_code (def) == GIMPLE_PHI
1126 && gimple_bb (def) == loop->header)
1127 continue;
1129 incr_iv = get_iv (data, var);
1130 if (!incr_iv)
1131 continue;
1133 /* If the increment is in the subloop, ignore it. */
1134 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1135 if (incr_bb->loop_father != data->current_loop
1136 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1137 continue;
1139 iv->biv_p = true;
1140 incr_iv->biv_p = true;
1144 /* Checks whether STMT defines a linear induction variable and stores its
1145 parameters to IV. */
1147 static bool
1148 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1150 tree lhs;
1151 struct loop *loop = data->current_loop;
1153 iv->base = NULL_TREE;
1154 iv->step = NULL_TREE;
1156 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1157 return false;
1159 lhs = gimple_assign_lhs (stmt);
1160 if (TREE_CODE (lhs) != SSA_NAME)
1161 return false;
1163 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1164 return false;
1165 iv->base = expand_simple_operations (iv->base);
1167 if (contains_abnormal_ssa_name_p (iv->base)
1168 || contains_abnormal_ssa_name_p (iv->step))
1169 return false;
1171 /* If STMT could throw, then do not consider STMT as defining a GIV.
1172 While this will suppress optimizations, we can not safely delete this
1173 GIV and associated statements, even if it appears it is not used. */
1174 if (stmt_could_throw_p (stmt))
1175 return false;
1177 return true;
1180 /* Finds general ivs in statement STMT. */
1182 static void
1183 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1185 affine_iv iv;
1187 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1188 return;
1190 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1193 /* Finds general ivs in basic block BB. */
1195 static void
1196 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1198 gimple_stmt_iterator bsi;
1200 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1201 find_givs_in_stmt (data, gsi_stmt (bsi));
1204 /* Finds general ivs. */
1206 static void
1207 find_givs (struct ivopts_data *data)
1209 struct loop *loop = data->current_loop;
1210 basic_block *body = get_loop_body_in_dom_order (loop);
1211 unsigned i;
1213 for (i = 0; i < loop->num_nodes; i++)
1214 find_givs_in_bb (data, body[i]);
1215 free (body);
1218 /* For each ssa name defined in LOOP determines whether it is an induction
1219 variable and if so, its initial value and step. */
1221 static bool
1222 find_induction_variables (struct ivopts_data *data)
1224 unsigned i;
1225 bitmap_iterator bi;
1227 if (!find_bivs (data))
1228 return false;
1230 find_givs (data);
1231 mark_bivs (data);
1233 if (dump_file && (dump_flags & TDF_DETAILS))
1235 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1237 if (niter)
1239 fprintf (dump_file, " number of iterations ");
1240 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1241 if (!integer_zerop (niter->may_be_zero))
1243 fprintf (dump_file, "; zero if ");
1244 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1246 fprintf (dump_file, "\n\n");
1249 fprintf (dump_file, "Induction variables:\n\n");
1251 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1253 if (ver_info (data, i)->iv)
1254 dump_iv (dump_file, ver_info (data, i)->iv);
1258 return true;
1261 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1263 static struct iv_use *
1264 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1265 gimple stmt, enum use_type use_type)
1267 struct iv_use *use = XCNEW (struct iv_use);
1269 use->id = n_iv_uses (data);
1270 use->type = use_type;
1271 use->iv = iv;
1272 use->stmt = stmt;
1273 use->op_p = use_p;
1274 use->related_cands = BITMAP_ALLOC (NULL);
1276 /* To avoid showing ssa name in the dumps, if it was not reset by the
1277 caller. */
1278 iv->ssa_name = NULL_TREE;
1280 if (dump_file && (dump_flags & TDF_DETAILS))
1281 dump_use (dump_file, use);
1283 data->iv_uses.safe_push (use);
1285 return use;
1288 /* Checks whether OP is a loop-level invariant and if so, records it.
1289 NONLINEAR_USE is true if the invariant is used in a way we do not
1290 handle specially. */
1292 static void
1293 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1295 basic_block bb;
1296 struct version_info *info;
1298 if (TREE_CODE (op) != SSA_NAME
1299 || virtual_operand_p (op))
1300 return;
1302 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1303 if (bb
1304 && flow_bb_inside_loop_p (data->current_loop, bb))
1305 return;
1307 info = name_info (data, op);
1308 info->name = op;
1309 info->has_nonlin_use |= nonlinear_use;
1310 if (!info->inv_id)
1311 info->inv_id = ++data->max_inv_id;
1312 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1315 /* Checks whether the use OP is interesting and if so, records it. */
1317 static struct iv_use *
1318 find_interesting_uses_op (struct ivopts_data *data, tree op)
1320 struct iv *iv;
1321 struct iv *civ;
1322 gimple stmt;
1323 struct iv_use *use;
1325 if (TREE_CODE (op) != SSA_NAME)
1326 return NULL;
1328 iv = get_iv (data, op);
1329 if (!iv)
1330 return NULL;
1332 if (iv->have_use_for)
1334 use = iv_use (data, iv->use_id);
1336 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1337 return use;
1340 if (integer_zerop (iv->step))
1342 record_invariant (data, op, true);
1343 return NULL;
1345 iv->have_use_for = true;
1347 civ = XNEW (struct iv);
1348 *civ = *iv;
1350 stmt = SSA_NAME_DEF_STMT (op);
1351 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1352 || is_gimple_assign (stmt));
1354 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1355 iv->use_id = use->id;
1357 return use;
1360 /* Given a condition in statement STMT, checks whether it is a compare
1361 of an induction variable and an invariant. If this is the case,
1362 CONTROL_VAR is set to location of the iv, BOUND to the location of
1363 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1364 induction variable descriptions, and true is returned. If this is not
1365 the case, CONTROL_VAR and BOUND are set to the arguments of the
1366 condition and false is returned. */
1368 static bool
1369 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1370 tree **control_var, tree **bound,
1371 struct iv **iv_var, struct iv **iv_bound)
1373 /* The objects returned when COND has constant operands. */
1374 static struct iv const_iv;
1375 static tree zero;
1376 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1377 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1378 bool ret = false;
1380 if (gimple_code (stmt) == GIMPLE_COND)
1382 op0 = gimple_cond_lhs_ptr (stmt);
1383 op1 = gimple_cond_rhs_ptr (stmt);
1385 else
1387 op0 = gimple_assign_rhs1_ptr (stmt);
1388 op1 = gimple_assign_rhs2_ptr (stmt);
1391 zero = integer_zero_node;
1392 const_iv.step = integer_zero_node;
1394 if (TREE_CODE (*op0) == SSA_NAME)
1395 iv0 = get_iv (data, *op0);
1396 if (TREE_CODE (*op1) == SSA_NAME)
1397 iv1 = get_iv (data, *op1);
1399 /* Exactly one of the compared values must be an iv, and the other one must
1400 be an invariant. */
1401 if (!iv0 || !iv1)
1402 goto end;
1404 if (integer_zerop (iv0->step))
1406 /* Control variable may be on the other side. */
1407 tmp_op = op0; op0 = op1; op1 = tmp_op;
1408 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1410 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1412 end:
1413 if (control_var)
1414 *control_var = op0;;
1415 if (iv_var)
1416 *iv_var = iv0;;
1417 if (bound)
1418 *bound = op1;
1419 if (iv_bound)
1420 *iv_bound = iv1;
1422 return ret;
1425 /* Checks whether the condition in STMT is interesting and if so,
1426 records it. */
1428 static void
1429 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1431 tree *var_p, *bound_p;
1432 struct iv *var_iv, *civ;
1434 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1436 find_interesting_uses_op (data, *var_p);
1437 find_interesting_uses_op (data, *bound_p);
1438 return;
1441 civ = XNEW (struct iv);
1442 *civ = *var_iv;
1443 record_use (data, NULL, civ, stmt, USE_COMPARE);
1446 /* Returns the outermost loop EXPR is obviously invariant in
1447 relative to the loop LOOP, i.e. if all its operands are defined
1448 outside of the returned loop. Returns NULL if EXPR is not
1449 even obviously invariant in LOOP. */
1451 struct loop *
1452 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1454 basic_block def_bb;
1455 unsigned i, len;
1457 if (is_gimple_min_invariant (expr))
1458 return current_loops->tree_root;
1460 if (TREE_CODE (expr) == SSA_NAME)
1462 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1463 if (def_bb)
1465 if (flow_bb_inside_loop_p (loop, def_bb))
1466 return NULL;
1467 return superloop_at_depth (loop,
1468 loop_depth (def_bb->loop_father) + 1);
1471 return current_loops->tree_root;
1474 if (!EXPR_P (expr))
1475 return NULL;
1477 unsigned maxdepth = 0;
1478 len = TREE_OPERAND_LENGTH (expr);
1479 for (i = 0; i < len; i++)
1481 struct loop *ivloop;
1482 if (!TREE_OPERAND (expr, i))
1483 continue;
1485 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1486 if (!ivloop)
1487 return NULL;
1488 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1491 return superloop_at_depth (loop, maxdepth);
1494 /* Returns true if expression EXPR is obviously invariant in LOOP,
1495 i.e. if all its operands are defined outside of the LOOP. LOOP
1496 should not be the function body. */
1498 bool
1499 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1501 basic_block def_bb;
1502 unsigned i, len;
1504 gcc_assert (loop_depth (loop) > 0);
1506 if (is_gimple_min_invariant (expr))
1507 return true;
1509 if (TREE_CODE (expr) == SSA_NAME)
1511 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1512 if (def_bb
1513 && flow_bb_inside_loop_p (loop, def_bb))
1514 return false;
1516 return true;
1519 if (!EXPR_P (expr))
1520 return false;
1522 len = TREE_OPERAND_LENGTH (expr);
1523 for (i = 0; i < len; i++)
1524 if (TREE_OPERAND (expr, i)
1525 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1526 return false;
1528 return true;
1531 /* Cumulates the steps of indices into DATA and replaces their values with the
1532 initial ones. Returns false when the value of the index cannot be determined.
1533 Callback for for_each_index. */
1535 struct ifs_ivopts_data
1537 struct ivopts_data *ivopts_data;
1538 gimple stmt;
1539 tree step;
1542 static bool
1543 idx_find_step (tree base, tree *idx, void *data)
1545 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1546 struct iv *iv;
1547 tree step, iv_base, iv_step, lbound, off;
1548 struct loop *loop = dta->ivopts_data->current_loop;
1550 /* If base is a component ref, require that the offset of the reference
1551 be invariant. */
1552 if (TREE_CODE (base) == COMPONENT_REF)
1554 off = component_ref_field_offset (base);
1555 return expr_invariant_in_loop_p (loop, off);
1558 /* If base is array, first check whether we will be able to move the
1559 reference out of the loop (in order to take its address in strength
1560 reduction). In order for this to work we need both lower bound
1561 and step to be loop invariants. */
1562 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564 /* Moreover, for a range, the size needs to be invariant as well. */
1565 if (TREE_CODE (base) == ARRAY_RANGE_REF
1566 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1567 return false;
1569 step = array_ref_element_size (base);
1570 lbound = array_ref_low_bound (base);
1572 if (!expr_invariant_in_loop_p (loop, step)
1573 || !expr_invariant_in_loop_p (loop, lbound))
1574 return false;
1577 if (TREE_CODE (*idx) != SSA_NAME)
1578 return true;
1580 iv = get_iv (dta->ivopts_data, *idx);
1581 if (!iv)
1582 return false;
1584 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1585 *&x[0], which is not folded and does not trigger the
1586 ARRAY_REF path below. */
1587 *idx = iv->base;
1589 if (integer_zerop (iv->step))
1590 return true;
1592 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1594 step = array_ref_element_size (base);
1596 /* We only handle addresses whose step is an integer constant. */
1597 if (TREE_CODE (step) != INTEGER_CST)
1598 return false;
1600 else
1601 /* The step for pointer arithmetics already is 1 byte. */
1602 step = size_one_node;
1604 iv_base = iv->base;
1605 iv_step = iv->step;
1606 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1607 sizetype, &iv_base, &iv_step, dta->stmt,
1608 false))
1610 /* The index might wrap. */
1611 return false;
1614 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1615 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1617 return true;
1620 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1621 object is passed to it in DATA. */
1623 static bool
1624 idx_record_use (tree base, tree *idx,
1625 void *vdata)
1627 struct ivopts_data *data = (struct ivopts_data *) vdata;
1628 find_interesting_uses_op (data, *idx);
1629 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1631 find_interesting_uses_op (data, array_ref_element_size (base));
1632 find_interesting_uses_op (data, array_ref_low_bound (base));
1634 return true;
1637 /* If we can prove that TOP = cst * BOT for some constant cst,
1638 store cst to MUL and return true. Otherwise return false.
1639 The returned value is always sign-extended, regardless of the
1640 signedness of TOP and BOT. */
1642 static bool
1643 constant_multiple_of (tree top, tree bot, widest_int *mul)
1645 tree mby;
1646 enum tree_code code;
1647 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1648 widest_int res, p0, p1;
1650 STRIP_NOPS (top);
1651 STRIP_NOPS (bot);
1653 if (operand_equal_p (top, bot, 0))
1655 *mul = 1;
1656 return true;
1659 code = TREE_CODE (top);
1660 switch (code)
1662 case MULT_EXPR:
1663 mby = TREE_OPERAND (top, 1);
1664 if (TREE_CODE (mby) != INTEGER_CST)
1665 return false;
1667 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1668 return false;
1670 *mul = wi::sext (res * wi::to_widest (mby), precision);
1671 return true;
1673 case PLUS_EXPR:
1674 case MINUS_EXPR:
1675 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1676 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1677 return false;
1679 if (code == MINUS_EXPR)
1680 p1 = -p1;
1681 *mul = wi::sext (p0 + p1, precision);
1682 return true;
1684 case INTEGER_CST:
1685 if (TREE_CODE (bot) != INTEGER_CST)
1686 return false;
1688 p0 = widest_int::from (top, SIGNED);
1689 p1 = widest_int::from (bot, SIGNED);
1690 if (p1 == 0)
1691 return false;
1692 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1693 return res == 0;
1695 default:
1696 return false;
1700 /* Return true if memory reference REF with step STEP may be unaligned. */
1702 static bool
1703 may_be_unaligned_p (tree ref, tree step)
1705 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1706 thus they are not misaligned. */
1707 if (TREE_CODE (ref) == TARGET_MEM_REF)
1708 return false;
1710 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1711 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1712 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1714 unsigned HOST_WIDE_INT bitpos;
1715 unsigned int ref_align;
1716 get_object_alignment_1 (ref, &ref_align, &bitpos);
1717 if (ref_align < align
1718 || (bitpos % align) != 0
1719 || (bitpos % BITS_PER_UNIT) != 0)
1720 return true;
1722 unsigned int trailing_zeros = tree_ctz (step);
1723 if (trailing_zeros < HOST_BITS_PER_INT
1724 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1725 return true;
1727 return false;
1730 /* Return true if EXPR may be non-addressable. */
1732 bool
1733 may_be_nonaddressable_p (tree expr)
1735 switch (TREE_CODE (expr))
1737 case TARGET_MEM_REF:
1738 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1739 target, thus they are always addressable. */
1740 return false;
1742 case COMPONENT_REF:
1743 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1744 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1746 case VIEW_CONVERT_EXPR:
1747 /* This kind of view-conversions may wrap non-addressable objects
1748 and make them look addressable. After some processing the
1749 non-addressability may be uncovered again, causing ADDR_EXPRs
1750 of inappropriate objects to be built. */
1751 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1752 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1753 return true;
1755 /* ... fall through ... */
1757 case ARRAY_REF:
1758 case ARRAY_RANGE_REF:
1759 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1761 CASE_CONVERT:
1762 return true;
1764 default:
1765 break;
1768 return false;
1771 /* Finds addresses in *OP_P inside STMT. */
1773 static void
1774 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1776 tree base = *op_p, step = size_zero_node;
1777 struct iv *civ;
1778 struct ifs_ivopts_data ifs_ivopts_data;
1780 /* Do not play with volatile memory references. A bit too conservative,
1781 perhaps, but safe. */
1782 if (gimple_has_volatile_ops (stmt))
1783 goto fail;
1785 /* Ignore bitfields for now. Not really something terribly complicated
1786 to handle. TODO. */
1787 if (TREE_CODE (base) == BIT_FIELD_REF)
1788 goto fail;
1790 base = unshare_expr (base);
1792 if (TREE_CODE (base) == TARGET_MEM_REF)
1794 tree type = build_pointer_type (TREE_TYPE (base));
1795 tree astep;
1797 if (TMR_BASE (base)
1798 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1800 civ = get_iv (data, TMR_BASE (base));
1801 if (!civ)
1802 goto fail;
1804 TMR_BASE (base) = civ->base;
1805 step = civ->step;
1807 if (TMR_INDEX2 (base)
1808 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1810 civ = get_iv (data, TMR_INDEX2 (base));
1811 if (!civ)
1812 goto fail;
1814 TMR_INDEX2 (base) = civ->base;
1815 step = civ->step;
1817 if (TMR_INDEX (base)
1818 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1820 civ = get_iv (data, TMR_INDEX (base));
1821 if (!civ)
1822 goto fail;
1824 TMR_INDEX (base) = civ->base;
1825 astep = civ->step;
1827 if (astep)
1829 if (TMR_STEP (base))
1830 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1832 step = fold_build2 (PLUS_EXPR, type, step, astep);
1836 if (integer_zerop (step))
1837 goto fail;
1838 base = tree_mem_ref_addr (type, base);
1840 else
1842 ifs_ivopts_data.ivopts_data = data;
1843 ifs_ivopts_data.stmt = stmt;
1844 ifs_ivopts_data.step = size_zero_node;
1845 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1846 || integer_zerop (ifs_ivopts_data.step))
1847 goto fail;
1848 step = ifs_ivopts_data.step;
1850 /* Check that the base expression is addressable. This needs
1851 to be done after substituting bases of IVs into it. */
1852 if (may_be_nonaddressable_p (base))
1853 goto fail;
1855 /* Moreover, on strict alignment platforms, check that it is
1856 sufficiently aligned. */
1857 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1858 goto fail;
1860 base = build_fold_addr_expr (base);
1862 /* Substituting bases of IVs into the base expression might
1863 have caused folding opportunities. */
1864 if (TREE_CODE (base) == ADDR_EXPR)
1866 tree *ref = &TREE_OPERAND (base, 0);
1867 while (handled_component_p (*ref))
1868 ref = &TREE_OPERAND (*ref, 0);
1869 if (TREE_CODE (*ref) == MEM_REF)
1871 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1872 TREE_OPERAND (*ref, 0),
1873 TREE_OPERAND (*ref, 1));
1874 if (tem)
1875 *ref = tem;
1880 civ = alloc_iv (base, step);
1881 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1882 return;
1884 fail:
1885 for_each_index (op_p, idx_record_use, data);
1888 /* Finds and records invariants used in STMT. */
1890 static void
1891 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1893 ssa_op_iter iter;
1894 use_operand_p use_p;
1895 tree op;
1897 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1899 op = USE_FROM_PTR (use_p);
1900 record_invariant (data, op, false);
1904 /* Finds interesting uses of induction variables in the statement STMT. */
1906 static void
1907 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1909 struct iv *iv;
1910 tree op, *lhs, *rhs;
1911 ssa_op_iter iter;
1912 use_operand_p use_p;
1913 enum tree_code code;
1915 find_invariants_stmt (data, stmt);
1917 if (gimple_code (stmt) == GIMPLE_COND)
1919 find_interesting_uses_cond (data, stmt);
1920 return;
1923 if (is_gimple_assign (stmt))
1925 lhs = gimple_assign_lhs_ptr (stmt);
1926 rhs = gimple_assign_rhs1_ptr (stmt);
1928 if (TREE_CODE (*lhs) == SSA_NAME)
1930 /* If the statement defines an induction variable, the uses are not
1931 interesting by themselves. */
1933 iv = get_iv (data, *lhs);
1935 if (iv && !integer_zerop (iv->step))
1936 return;
1939 code = gimple_assign_rhs_code (stmt);
1940 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1941 && (REFERENCE_CLASS_P (*rhs)
1942 || is_gimple_val (*rhs)))
1944 if (REFERENCE_CLASS_P (*rhs))
1945 find_interesting_uses_address (data, stmt, rhs);
1946 else
1947 find_interesting_uses_op (data, *rhs);
1949 if (REFERENCE_CLASS_P (*lhs))
1950 find_interesting_uses_address (data, stmt, lhs);
1951 return;
1953 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1955 find_interesting_uses_cond (data, stmt);
1956 return;
1959 /* TODO -- we should also handle address uses of type
1961 memory = call (whatever);
1965 call (memory). */
1968 if (gimple_code (stmt) == GIMPLE_PHI
1969 && gimple_bb (stmt) == data->current_loop->header)
1971 iv = get_iv (data, PHI_RESULT (stmt));
1973 if (iv && !integer_zerop (iv->step))
1974 return;
1977 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1979 op = USE_FROM_PTR (use_p);
1981 if (TREE_CODE (op) != SSA_NAME)
1982 continue;
1984 iv = get_iv (data, op);
1985 if (!iv)
1986 continue;
1988 find_interesting_uses_op (data, op);
1992 /* Finds interesting uses of induction variables outside of loops
1993 on loop exit edge EXIT. */
1995 static void
1996 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1998 gimple_phi phi;
1999 gimple_phi_iterator psi;
2000 tree def;
2002 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2004 phi = psi.phi ();
2005 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2006 if (!virtual_operand_p (def))
2007 find_interesting_uses_op (data, def);
2011 /* Finds uses of the induction variables that are interesting. */
2013 static void
2014 find_interesting_uses (struct ivopts_data *data)
2016 basic_block bb;
2017 gimple_stmt_iterator bsi;
2018 basic_block *body = get_loop_body (data->current_loop);
2019 unsigned i;
2020 struct version_info *info;
2021 edge e;
2023 if (dump_file && (dump_flags & TDF_DETAILS))
2024 fprintf (dump_file, "Uses:\n\n");
2026 for (i = 0; i < data->current_loop->num_nodes; i++)
2028 edge_iterator ei;
2029 bb = body[i];
2031 FOR_EACH_EDGE (e, ei, bb->succs)
2032 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2033 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2034 find_interesting_uses_outside (data, e);
2036 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2037 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2038 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2039 if (!is_gimple_debug (gsi_stmt (bsi)))
2040 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2043 if (dump_file && (dump_flags & TDF_DETAILS))
2045 bitmap_iterator bi;
2047 fprintf (dump_file, "\n");
2049 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2051 info = ver_info (data, i);
2052 if (info->inv_id)
2054 fprintf (dump_file, " ");
2055 print_generic_expr (dump_file, info->name, TDF_SLIM);
2056 fprintf (dump_file, " is invariant (%d)%s\n",
2057 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2061 fprintf (dump_file, "\n");
2064 free (body);
2067 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2068 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2069 we are at the top-level of the processed address. */
2071 static tree
2072 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2073 HOST_WIDE_INT *offset)
2075 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2076 enum tree_code code;
2077 tree type, orig_type = TREE_TYPE (expr);
2078 HOST_WIDE_INT off0, off1, st;
2079 tree orig_expr = expr;
2081 STRIP_NOPS (expr);
2083 type = TREE_TYPE (expr);
2084 code = TREE_CODE (expr);
2085 *offset = 0;
2087 switch (code)
2089 case INTEGER_CST:
2090 if (!cst_and_fits_in_hwi (expr)
2091 || integer_zerop (expr))
2092 return orig_expr;
2094 *offset = int_cst_value (expr);
2095 return build_int_cst (orig_type, 0);
2097 case POINTER_PLUS_EXPR:
2098 case PLUS_EXPR:
2099 case MINUS_EXPR:
2100 op0 = TREE_OPERAND (expr, 0);
2101 op1 = TREE_OPERAND (expr, 1);
2103 op0 = strip_offset_1 (op0, false, false, &off0);
2104 op1 = strip_offset_1 (op1, false, false, &off1);
2106 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2107 if (op0 == TREE_OPERAND (expr, 0)
2108 && op1 == TREE_OPERAND (expr, 1))
2109 return orig_expr;
2111 if (integer_zerop (op1))
2112 expr = op0;
2113 else if (integer_zerop (op0))
2115 if (code == MINUS_EXPR)
2116 expr = fold_build1 (NEGATE_EXPR, type, op1);
2117 else
2118 expr = op1;
2120 else
2121 expr = fold_build2 (code, type, op0, op1);
2123 return fold_convert (orig_type, expr);
2125 case MULT_EXPR:
2126 op1 = TREE_OPERAND (expr, 1);
2127 if (!cst_and_fits_in_hwi (op1))
2128 return orig_expr;
2130 op0 = TREE_OPERAND (expr, 0);
2131 op0 = strip_offset_1 (op0, false, false, &off0);
2132 if (op0 == TREE_OPERAND (expr, 0))
2133 return orig_expr;
2135 *offset = off0 * int_cst_value (op1);
2136 if (integer_zerop (op0))
2137 expr = op0;
2138 else
2139 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2141 return fold_convert (orig_type, expr);
2143 case ARRAY_REF:
2144 case ARRAY_RANGE_REF:
2145 if (!inside_addr)
2146 return orig_expr;
2148 step = array_ref_element_size (expr);
2149 if (!cst_and_fits_in_hwi (step))
2150 break;
2152 st = int_cst_value (step);
2153 op1 = TREE_OPERAND (expr, 1);
2154 op1 = strip_offset_1 (op1, false, false, &off1);
2155 *offset = off1 * st;
2157 if (top_compref
2158 && integer_zerop (op1))
2160 /* Strip the component reference completely. */
2161 op0 = TREE_OPERAND (expr, 0);
2162 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2163 *offset += off0;
2164 return op0;
2166 break;
2168 case COMPONENT_REF:
2170 tree field;
2172 if (!inside_addr)
2173 return orig_expr;
2175 tmp = component_ref_field_offset (expr);
2176 field = TREE_OPERAND (expr, 1);
2177 if (top_compref
2178 && cst_and_fits_in_hwi (tmp)
2179 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2181 HOST_WIDE_INT boffset, abs_off;
2183 /* Strip the component reference completely. */
2184 op0 = TREE_OPERAND (expr, 0);
2185 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2186 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2187 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2188 if (boffset < 0)
2189 abs_off = -abs_off;
2191 *offset = off0 + int_cst_value (tmp) + abs_off;
2192 return op0;
2195 break;
2197 case ADDR_EXPR:
2198 op0 = TREE_OPERAND (expr, 0);
2199 op0 = strip_offset_1 (op0, true, true, &off0);
2200 *offset += off0;
2202 if (op0 == TREE_OPERAND (expr, 0))
2203 return orig_expr;
2205 expr = build_fold_addr_expr (op0);
2206 return fold_convert (orig_type, expr);
2208 case MEM_REF:
2209 /* ??? Offset operand? */
2210 inside_addr = false;
2211 break;
2213 default:
2214 return orig_expr;
2217 /* Default handling of expressions for that we want to recurse into
2218 the first operand. */
2219 op0 = TREE_OPERAND (expr, 0);
2220 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2221 *offset += off0;
2223 if (op0 == TREE_OPERAND (expr, 0)
2224 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2225 return orig_expr;
2227 expr = copy_node (expr);
2228 TREE_OPERAND (expr, 0) = op0;
2229 if (op1)
2230 TREE_OPERAND (expr, 1) = op1;
2232 /* Inside address, we might strip the top level component references,
2233 thus changing type of the expression. Handling of ADDR_EXPR
2234 will fix that. */
2235 expr = fold_convert (orig_type, expr);
2237 return expr;
2240 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2242 static tree
2243 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2245 HOST_WIDE_INT off;
2246 tree core = strip_offset_1 (expr, false, false, &off);
2247 *offset = off;
2248 return core;
2251 /* Returns variant of TYPE that can be used as base for different uses.
2252 We return unsigned type with the same precision, which avoids problems
2253 with overflows. */
2255 static tree
2256 generic_type_for (tree type)
2258 if (POINTER_TYPE_P (type))
2259 return unsigned_type_for (type);
2261 if (TYPE_UNSIGNED (type))
2262 return type;
2264 return unsigned_type_for (type);
2267 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2268 the bitmap to that we should store it. */
2270 static struct ivopts_data *fd_ivopts_data;
2271 static tree
2272 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2274 bitmap *depends_on = (bitmap *) data;
2275 struct version_info *info;
2277 if (TREE_CODE (*expr_p) != SSA_NAME)
2278 return NULL_TREE;
2279 info = name_info (fd_ivopts_data, *expr_p);
2281 if (!info->inv_id || info->has_nonlin_use)
2282 return NULL_TREE;
2284 if (!*depends_on)
2285 *depends_on = BITMAP_ALLOC (NULL);
2286 bitmap_set_bit (*depends_on, info->inv_id);
2288 return NULL_TREE;
2291 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2292 position to POS. If USE is not NULL, the candidate is set as related to
2293 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2294 replacement of the final value of the iv by a direct computation. */
2296 static struct iv_cand *
2297 add_candidate_1 (struct ivopts_data *data,
2298 tree base, tree step, bool important, enum iv_position pos,
2299 struct iv_use *use, gimple incremented_at)
2301 unsigned i;
2302 struct iv_cand *cand = NULL;
2303 tree type, orig_type;
2305 /* For non-original variables, make sure their values are computed in a type
2306 that does not invoke undefined behavior on overflows (since in general,
2307 we cannot prove that these induction variables are non-wrapping). */
2308 if (pos != IP_ORIGINAL)
2310 orig_type = TREE_TYPE (base);
2311 type = generic_type_for (orig_type);
2312 if (type != orig_type)
2314 base = fold_convert (type, base);
2315 step = fold_convert (type, step);
2319 for (i = 0; i < n_iv_cands (data); i++)
2321 cand = iv_cand (data, i);
2323 if (cand->pos != pos)
2324 continue;
2326 if (cand->incremented_at != incremented_at
2327 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2328 && cand->ainc_use != use))
2329 continue;
2331 if (!cand->iv)
2333 if (!base && !step)
2334 break;
2336 continue;
2339 if (!base && !step)
2340 continue;
2342 if (operand_equal_p (base, cand->iv->base, 0)
2343 && operand_equal_p (step, cand->iv->step, 0)
2344 && (TYPE_PRECISION (TREE_TYPE (base))
2345 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2346 break;
2349 if (i == n_iv_cands (data))
2351 cand = XCNEW (struct iv_cand);
2352 cand->id = i;
2354 if (!base && !step)
2355 cand->iv = NULL;
2356 else
2357 cand->iv = alloc_iv (base, step);
2359 cand->pos = pos;
2360 if (pos != IP_ORIGINAL && cand->iv)
2362 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2363 cand->var_after = cand->var_before;
2365 cand->important = important;
2366 cand->incremented_at = incremented_at;
2367 data->iv_candidates.safe_push (cand);
2369 if (step
2370 && TREE_CODE (step) != INTEGER_CST)
2372 fd_ivopts_data = data;
2373 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2376 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2377 cand->ainc_use = use;
2378 else
2379 cand->ainc_use = NULL;
2381 if (dump_file && (dump_flags & TDF_DETAILS))
2382 dump_cand (dump_file, cand);
2385 if (important && !cand->important)
2387 cand->important = true;
2388 if (dump_file && (dump_flags & TDF_DETAILS))
2389 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2392 if (use)
2394 bitmap_set_bit (use->related_cands, i);
2395 if (dump_file && (dump_flags & TDF_DETAILS))
2396 fprintf (dump_file, "Candidate %d is related to use %d\n",
2397 cand->id, use->id);
2400 return cand;
2403 /* Returns true if incrementing the induction variable at the end of the LOOP
2404 is allowed.
2406 The purpose is to avoid splitting latch edge with a biv increment, thus
2407 creating a jump, possibly confusing other optimization passes and leaving
2408 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2409 is not available (so we do not have a better alternative), or if the latch
2410 edge is already nonempty. */
2412 static bool
2413 allow_ip_end_pos_p (struct loop *loop)
2415 if (!ip_normal_pos (loop))
2416 return true;
2418 if (!empty_block_p (ip_end_pos (loop)))
2419 return true;
2421 return false;
2424 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2425 Important field is set to IMPORTANT. */
2427 static void
2428 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2429 bool important, struct iv_use *use)
2431 basic_block use_bb = gimple_bb (use->stmt);
2432 enum machine_mode mem_mode;
2433 unsigned HOST_WIDE_INT cstepi;
2435 /* If we insert the increment in any position other than the standard
2436 ones, we must ensure that it is incremented once per iteration.
2437 It must not be in an inner nested loop, or one side of an if
2438 statement. */
2439 if (use_bb->loop_father != data->current_loop
2440 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2441 || stmt_could_throw_p (use->stmt)
2442 || !cst_and_fits_in_hwi (step))
2443 return;
2445 cstepi = int_cst_value (step);
2447 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2448 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2449 || USE_STORE_PRE_INCREMENT (mem_mode))
2450 && GET_MODE_SIZE (mem_mode) == cstepi)
2451 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2452 || USE_STORE_PRE_DECREMENT (mem_mode))
2453 && GET_MODE_SIZE (mem_mode) == -cstepi))
2455 enum tree_code code = MINUS_EXPR;
2456 tree new_base;
2457 tree new_step = step;
2459 if (POINTER_TYPE_P (TREE_TYPE (base)))
2461 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2462 code = POINTER_PLUS_EXPR;
2464 else
2465 new_step = fold_convert (TREE_TYPE (base), new_step);
2466 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2467 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2468 use->stmt);
2470 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2471 || USE_STORE_POST_INCREMENT (mem_mode))
2472 && GET_MODE_SIZE (mem_mode) == cstepi)
2473 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2474 || USE_STORE_POST_DECREMENT (mem_mode))
2475 && GET_MODE_SIZE (mem_mode) == -cstepi))
2477 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2478 use->stmt);
2482 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2483 position to POS. If USE is not NULL, the candidate is set as related to
2484 it. The candidate computation is scheduled on all available positions. */
2486 static void
2487 add_candidate (struct ivopts_data *data,
2488 tree base, tree step, bool important, struct iv_use *use)
2490 if (ip_normal_pos (data->current_loop))
2491 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2492 if (ip_end_pos (data->current_loop)
2493 && allow_ip_end_pos_p (data->current_loop))
2494 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2496 if (use != NULL && use->type == USE_ADDRESS)
2497 add_autoinc_candidates (data, base, step, important, use);
2500 /* Adds standard iv candidates. */
2502 static void
2503 add_standard_iv_candidates (struct ivopts_data *data)
2505 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2507 /* The same for a double-integer type if it is still fast enough. */
2508 if (TYPE_PRECISION
2509 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2510 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2511 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2512 build_int_cst (long_integer_type_node, 1), true, NULL);
2514 /* The same for a double-integer type if it is still fast enough. */
2515 if (TYPE_PRECISION
2516 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2517 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2518 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2519 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2523 /* Adds candidates bases on the old induction variable IV. */
2525 static void
2526 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2528 gimple phi;
2529 tree def;
2530 struct iv_cand *cand;
2532 add_candidate (data, iv->base, iv->step, true, NULL);
2534 /* The same, but with initial value zero. */
2535 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2536 add_candidate (data, size_int (0), iv->step, true, NULL);
2537 else
2538 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2539 iv->step, true, NULL);
2541 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2542 if (gimple_code (phi) == GIMPLE_PHI)
2544 /* Additionally record the possibility of leaving the original iv
2545 untouched. */
2546 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2547 /* Don't add candidate if it's from another PHI node because
2548 it's an affine iv appearing in the form of PEELED_CHREC. */
2549 phi = SSA_NAME_DEF_STMT (def);
2550 if (gimple_code (phi) != GIMPLE_PHI)
2552 cand = add_candidate_1 (data,
2553 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2554 SSA_NAME_DEF_STMT (def));
2555 cand->var_before = iv->ssa_name;
2556 cand->var_after = def;
2558 else
2559 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2563 /* Adds candidates based on the old induction variables. */
2565 static void
2566 add_old_ivs_candidates (struct ivopts_data *data)
2568 unsigned i;
2569 struct iv *iv;
2570 bitmap_iterator bi;
2572 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2574 iv = ver_info (data, i)->iv;
2575 if (iv && iv->biv_p && !integer_zerop (iv->step))
2576 add_old_iv_candidates (data, iv);
2580 /* Adds candidates based on the value of the induction variable IV and USE. */
2582 static void
2583 add_iv_value_candidates (struct ivopts_data *data,
2584 struct iv *iv, struct iv_use *use)
2586 unsigned HOST_WIDE_INT offset;
2587 tree base;
2588 tree basetype;
2590 add_candidate (data, iv->base, iv->step, false, use);
2592 /* The same, but with initial value zero. Make such variable important,
2593 since it is generic enough so that possibly many uses may be based
2594 on it. */
2595 basetype = TREE_TYPE (iv->base);
2596 if (POINTER_TYPE_P (basetype))
2597 basetype = sizetype;
2598 add_candidate (data, build_int_cst (basetype, 0),
2599 iv->step, true, use);
2601 /* Third, try removing the constant offset. Make sure to even
2602 add a candidate for &a[0] vs. (T *)&a. */
2603 base = strip_offset (iv->base, &offset);
2604 if (offset
2605 || base != iv->base)
2606 add_candidate (data, base, iv->step, false, use);
2609 /* Adds candidates based on the uses. */
2611 static void
2612 add_derived_ivs_candidates (struct ivopts_data *data)
2614 unsigned i;
2616 for (i = 0; i < n_iv_uses (data); i++)
2618 struct iv_use *use = iv_use (data, i);
2620 if (!use)
2621 continue;
2623 switch (use->type)
2625 case USE_NONLINEAR_EXPR:
2626 case USE_COMPARE:
2627 case USE_ADDRESS:
2628 /* Just add the ivs based on the value of the iv used here. */
2629 add_iv_value_candidates (data, use->iv, use);
2630 break;
2632 default:
2633 gcc_unreachable ();
2638 /* Record important candidates and add them to related_cands bitmaps
2639 if needed. */
2641 static void
2642 record_important_candidates (struct ivopts_data *data)
2644 unsigned i;
2645 struct iv_use *use;
2647 for (i = 0; i < n_iv_cands (data); i++)
2649 struct iv_cand *cand = iv_cand (data, i);
2651 if (cand->important)
2652 bitmap_set_bit (data->important_candidates, i);
2655 data->consider_all_candidates = (n_iv_cands (data)
2656 <= CONSIDER_ALL_CANDIDATES_BOUND);
2658 if (data->consider_all_candidates)
2660 /* We will not need "related_cands" bitmaps in this case,
2661 so release them to decrease peak memory consumption. */
2662 for (i = 0; i < n_iv_uses (data); i++)
2664 use = iv_use (data, i);
2665 BITMAP_FREE (use->related_cands);
2668 else
2670 /* Add important candidates to the related_cands bitmaps. */
2671 for (i = 0; i < n_iv_uses (data); i++)
2672 bitmap_ior_into (iv_use (data, i)->related_cands,
2673 data->important_candidates);
2677 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2678 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2679 we allocate a simple list to every use. */
2681 static void
2682 alloc_use_cost_map (struct ivopts_data *data)
2684 unsigned i, size, s;
2686 for (i = 0; i < n_iv_uses (data); i++)
2688 struct iv_use *use = iv_use (data, i);
2690 if (data->consider_all_candidates)
2691 size = n_iv_cands (data);
2692 else
2694 s = bitmap_count_bits (use->related_cands);
2696 /* Round up to the power of two, so that moduling by it is fast. */
2697 size = s ? (1 << ceil_log2 (s)) : 1;
2700 use->n_map_members = size;
2701 use->cost_map = XCNEWVEC (struct cost_pair, size);
2705 /* Returns description of computation cost of expression whose runtime
2706 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2708 static comp_cost
2709 new_cost (unsigned runtime, unsigned complexity)
2711 comp_cost cost;
2713 cost.cost = runtime;
2714 cost.complexity = complexity;
2716 return cost;
2719 /* Adds costs COST1 and COST2. */
2721 static comp_cost
2722 add_costs (comp_cost cost1, comp_cost cost2)
2724 cost1.cost += cost2.cost;
2725 cost1.complexity += cost2.complexity;
2727 return cost1;
2729 /* Subtracts costs COST1 and COST2. */
2731 static comp_cost
2732 sub_costs (comp_cost cost1, comp_cost cost2)
2734 cost1.cost -= cost2.cost;
2735 cost1.complexity -= cost2.complexity;
2737 return cost1;
2740 /* Returns a negative number if COST1 < COST2, a positive number if
2741 COST1 > COST2, and 0 if COST1 = COST2. */
2743 static int
2744 compare_costs (comp_cost cost1, comp_cost cost2)
2746 if (cost1.cost == cost2.cost)
2747 return cost1.complexity - cost2.complexity;
2749 return cost1.cost - cost2.cost;
2752 /* Returns true if COST is infinite. */
2754 static bool
2755 infinite_cost_p (comp_cost cost)
2757 return cost.cost == INFTY;
2760 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2761 on invariants DEPENDS_ON and that the value used in expressing it
2762 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2764 static void
2765 set_use_iv_cost (struct ivopts_data *data,
2766 struct iv_use *use, struct iv_cand *cand,
2767 comp_cost cost, bitmap depends_on, tree value,
2768 enum tree_code comp, int inv_expr_id)
2770 unsigned i, s;
2772 if (infinite_cost_p (cost))
2774 BITMAP_FREE (depends_on);
2775 return;
2778 if (data->consider_all_candidates)
2780 use->cost_map[cand->id].cand = cand;
2781 use->cost_map[cand->id].cost = cost;
2782 use->cost_map[cand->id].depends_on = depends_on;
2783 use->cost_map[cand->id].value = value;
2784 use->cost_map[cand->id].comp = comp;
2785 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2786 return;
2789 /* n_map_members is a power of two, so this computes modulo. */
2790 s = cand->id & (use->n_map_members - 1);
2791 for (i = s; i < use->n_map_members; i++)
2792 if (!use->cost_map[i].cand)
2793 goto found;
2794 for (i = 0; i < s; i++)
2795 if (!use->cost_map[i].cand)
2796 goto found;
2798 gcc_unreachable ();
2800 found:
2801 use->cost_map[i].cand = cand;
2802 use->cost_map[i].cost = cost;
2803 use->cost_map[i].depends_on = depends_on;
2804 use->cost_map[i].value = value;
2805 use->cost_map[i].comp = comp;
2806 use->cost_map[i].inv_expr_id = inv_expr_id;
2809 /* Gets cost of (USE, CANDIDATE) pair. */
2811 static struct cost_pair *
2812 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2813 struct iv_cand *cand)
2815 unsigned i, s;
2816 struct cost_pair *ret;
2818 if (!cand)
2819 return NULL;
2821 if (data->consider_all_candidates)
2823 ret = use->cost_map + cand->id;
2824 if (!ret->cand)
2825 return NULL;
2827 return ret;
2830 /* n_map_members is a power of two, so this computes modulo. */
2831 s = cand->id & (use->n_map_members - 1);
2832 for (i = s; i < use->n_map_members; i++)
2833 if (use->cost_map[i].cand == cand)
2834 return use->cost_map + i;
2835 else if (use->cost_map[i].cand == NULL)
2836 return NULL;
2837 for (i = 0; i < s; i++)
2838 if (use->cost_map[i].cand == cand)
2839 return use->cost_map + i;
2840 else if (use->cost_map[i].cand == NULL)
2841 return NULL;
2843 return NULL;
2846 /* Returns estimate on cost of computing SEQ. */
2848 static unsigned
2849 seq_cost (rtx_insn *seq, bool speed)
2851 unsigned cost = 0;
2852 rtx set;
2854 for (; seq; seq = NEXT_INSN (seq))
2856 set = single_set (seq);
2857 if (set)
2858 cost += set_src_cost (SET_SRC (set), speed);
2859 else
2860 cost++;
2863 return cost;
2866 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2867 static rtx
2868 produce_memory_decl_rtl (tree obj, int *regno)
2870 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2871 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2872 rtx x;
2874 gcc_assert (obj);
2875 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2877 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2878 x = gen_rtx_SYMBOL_REF (address_mode, name);
2879 SET_SYMBOL_REF_DECL (x, obj);
2880 x = gen_rtx_MEM (DECL_MODE (obj), x);
2881 set_mem_addr_space (x, as);
2882 targetm.encode_section_info (obj, x, true);
2884 else
2886 x = gen_raw_REG (address_mode, (*regno)++);
2887 x = gen_rtx_MEM (DECL_MODE (obj), x);
2888 set_mem_addr_space (x, as);
2891 return x;
2894 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2895 walk_tree. DATA contains the actual fake register number. */
2897 static tree
2898 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2900 tree obj = NULL_TREE;
2901 rtx x = NULL_RTX;
2902 int *regno = (int *) data;
2904 switch (TREE_CODE (*expr_p))
2906 case ADDR_EXPR:
2907 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2908 handled_component_p (*expr_p);
2909 expr_p = &TREE_OPERAND (*expr_p, 0))
2910 continue;
2911 obj = *expr_p;
2912 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2913 x = produce_memory_decl_rtl (obj, regno);
2914 break;
2916 case SSA_NAME:
2917 *ws = 0;
2918 obj = SSA_NAME_VAR (*expr_p);
2919 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2920 if (!obj)
2921 return NULL_TREE;
2922 if (!DECL_RTL_SET_P (obj))
2923 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2924 break;
2926 case VAR_DECL:
2927 case PARM_DECL:
2928 case RESULT_DECL:
2929 *ws = 0;
2930 obj = *expr_p;
2932 if (DECL_RTL_SET_P (obj))
2933 break;
2935 if (DECL_MODE (obj) == BLKmode)
2936 x = produce_memory_decl_rtl (obj, regno);
2937 else
2938 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2940 break;
2942 default:
2943 break;
2946 if (x)
2948 decl_rtl_to_reset.safe_push (obj);
2949 SET_DECL_RTL (obj, x);
2952 return NULL_TREE;
2955 /* Determines cost of the computation of EXPR. */
2957 static unsigned
2958 computation_cost (tree expr, bool speed)
2960 rtx_insn *seq;
2961 rtx rslt;
2962 tree type = TREE_TYPE (expr);
2963 unsigned cost;
2964 /* Avoid using hard regs in ways which may be unsupported. */
2965 int regno = LAST_VIRTUAL_REGISTER + 1;
2966 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2967 enum node_frequency real_frequency = node->frequency;
2969 node->frequency = NODE_FREQUENCY_NORMAL;
2970 crtl->maybe_hot_insn_p = speed;
2971 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2972 start_sequence ();
2973 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2974 seq = get_insns ();
2975 end_sequence ();
2976 default_rtl_profile ();
2977 node->frequency = real_frequency;
2979 cost = seq_cost (seq, speed);
2980 if (MEM_P (rslt))
2981 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2982 TYPE_ADDR_SPACE (type), speed);
2983 else if (!REG_P (rslt))
2984 cost += set_src_cost (rslt, speed);
2986 return cost;
2989 /* Returns variable containing the value of candidate CAND at statement AT. */
2991 static tree
2992 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2994 if (stmt_after_increment (loop, cand, stmt))
2995 return cand->var_after;
2996 else
2997 return cand->var_before;
3000 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3001 same precision that is at least as wide as the precision of TYPE, stores
3002 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3003 type of A and B. */
3005 static tree
3006 determine_common_wider_type (tree *a, tree *b)
3008 tree wider_type = NULL;
3009 tree suba, subb;
3010 tree atype = TREE_TYPE (*a);
3012 if (CONVERT_EXPR_P (*a))
3014 suba = TREE_OPERAND (*a, 0);
3015 wider_type = TREE_TYPE (suba);
3016 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3017 return atype;
3019 else
3020 return atype;
3022 if (CONVERT_EXPR_P (*b))
3024 subb = TREE_OPERAND (*b, 0);
3025 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3026 return atype;
3028 else
3029 return atype;
3031 *a = suba;
3032 *b = subb;
3033 return wider_type;
3036 /* Determines the expression by that USE is expressed from induction variable
3037 CAND at statement AT in LOOP. The expression is stored in a decomposed
3038 form into AFF. Returns false if USE cannot be expressed using CAND. */
3040 static bool
3041 get_computation_aff (struct loop *loop,
3042 struct iv_use *use, struct iv_cand *cand, gimple at,
3043 struct aff_tree *aff)
3045 tree ubase = use->iv->base;
3046 tree ustep = use->iv->step;
3047 tree cbase = cand->iv->base;
3048 tree cstep = cand->iv->step, cstep_common;
3049 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3050 tree common_type, var;
3051 tree uutype;
3052 aff_tree cbase_aff, var_aff;
3053 widest_int rat;
3055 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3057 /* We do not have a precision to express the values of use. */
3058 return false;
3061 var = var_at_stmt (loop, cand, at);
3062 uutype = unsigned_type_for (utype);
3064 /* If the conversion is not noop, perform it. */
3065 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3067 cstep = fold_convert (uutype, cstep);
3068 cbase = fold_convert (uutype, cbase);
3069 var = fold_convert (uutype, var);
3072 if (!constant_multiple_of (ustep, cstep, &rat))
3073 return false;
3075 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3076 type, we achieve better folding by computing their difference in this
3077 wider type, and cast the result to UUTYPE. We do not need to worry about
3078 overflows, as all the arithmetics will in the end be performed in UUTYPE
3079 anyway. */
3080 common_type = determine_common_wider_type (&ubase, &cbase);
3082 /* use = ubase - ratio * cbase + ratio * var. */
3083 tree_to_aff_combination (ubase, common_type, aff);
3084 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3085 tree_to_aff_combination (var, uutype, &var_aff);
3087 /* We need to shift the value if we are after the increment. */
3088 if (stmt_after_increment (loop, cand, at))
3090 aff_tree cstep_aff;
3092 if (common_type != uutype)
3093 cstep_common = fold_convert (common_type, cstep);
3094 else
3095 cstep_common = cstep;
3097 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3098 aff_combination_add (&cbase_aff, &cstep_aff);
3101 aff_combination_scale (&cbase_aff, -rat);
3102 aff_combination_add (aff, &cbase_aff);
3103 if (common_type != uutype)
3104 aff_combination_convert (aff, uutype);
3106 aff_combination_scale (&var_aff, rat);
3107 aff_combination_add (aff, &var_aff);
3109 return true;
3112 /* Return the type of USE. */
3114 static tree
3115 get_use_type (struct iv_use *use)
3117 tree base_type = TREE_TYPE (use->iv->base);
3118 tree type;
3120 if (use->type == USE_ADDRESS)
3122 /* The base_type may be a void pointer. Create a pointer type based on
3123 the mem_ref instead. */
3124 type = build_pointer_type (TREE_TYPE (*use->op_p));
3125 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3126 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3128 else
3129 type = base_type;
3131 return type;
3134 /* Determines the expression by that USE is expressed from induction variable
3135 CAND at statement AT in LOOP. The computation is unshared. */
3137 static tree
3138 get_computation_at (struct loop *loop,
3139 struct iv_use *use, struct iv_cand *cand, gimple at)
3141 aff_tree aff;
3142 tree type = get_use_type (use);
3144 if (!get_computation_aff (loop, use, cand, at, &aff))
3145 return NULL_TREE;
3146 unshare_aff_combination (&aff);
3147 return fold_convert (type, aff_combination_to_tree (&aff));
3150 /* Determines the expression by that USE is expressed from induction variable
3151 CAND in LOOP. The computation is unshared. */
3153 static tree
3154 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3156 return get_computation_at (loop, use, cand, use->stmt);
3159 /* Adjust the cost COST for being in loop setup rather than loop body.
3160 If we're optimizing for space, the loop setup overhead is constant;
3161 if we're optimizing for speed, amortize it over the per-iteration cost. */
3162 static unsigned
3163 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3165 if (cost == INFTY)
3166 return cost;
3167 else if (optimize_loop_for_speed_p (data->current_loop))
3168 return cost / avg_loop_niter (data->current_loop);
3169 else
3170 return cost;
3173 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3174 validity for a memory reference accessing memory of mode MODE in
3175 address space AS. */
3178 bool
3179 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3180 addr_space_t as)
3182 #define MAX_RATIO 128
3183 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3184 static vec<sbitmap> valid_mult_list;
3185 sbitmap valid_mult;
3187 if (data_index >= valid_mult_list.length ())
3188 valid_mult_list.safe_grow_cleared (data_index + 1);
3190 valid_mult = valid_mult_list[data_index];
3191 if (!valid_mult)
3193 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3194 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3195 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3196 rtx addr, scaled;
3197 HOST_WIDE_INT i;
3199 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3200 bitmap_clear (valid_mult);
3201 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3202 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3203 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3205 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3206 if (memory_address_addr_space_p (mode, addr, as)
3207 || memory_address_addr_space_p (mode, scaled, as))
3208 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3211 if (dump_file && (dump_flags & TDF_DETAILS))
3213 fprintf (dump_file, " allowed multipliers:");
3214 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3215 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3216 fprintf (dump_file, " %d", (int) i);
3217 fprintf (dump_file, "\n");
3218 fprintf (dump_file, "\n");
3221 valid_mult_list[data_index] = valid_mult;
3224 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3225 return false;
3227 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3230 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3231 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3232 variable is omitted. Compute the cost for a memory reference that accesses
3233 a memory location of mode MEM_MODE in address space AS.
3235 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3236 size of MEM_MODE / RATIO) is available. To make this determination, we
3237 look at the size of the increment to be made, which is given in CSTEP.
3238 CSTEP may be zero if the step is unknown.
3239 STMT_AFTER_INC is true iff the statement we're looking at is after the
3240 increment of the original biv.
3242 TODO -- there must be some better way. This all is quite crude. */
3244 enum ainc_type
3246 AINC_PRE_INC, /* Pre increment. */
3247 AINC_PRE_DEC, /* Pre decrement. */
3248 AINC_POST_INC, /* Post increment. */
3249 AINC_POST_DEC, /* Post decrement. */
3250 AINC_NONE /* Also the number of auto increment types. */
3253 typedef struct address_cost_data_s
3255 HOST_WIDE_INT min_offset, max_offset;
3256 unsigned costs[2][2][2][2];
3257 unsigned ainc_costs[AINC_NONE];
3258 } *address_cost_data;
3261 static comp_cost
3262 get_address_cost (bool symbol_present, bool var_present,
3263 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3264 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3265 addr_space_t as, bool speed,
3266 bool stmt_after_inc, bool *may_autoinc)
3268 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3269 static vec<address_cost_data> address_cost_data_list;
3270 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3271 address_cost_data data;
3272 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3273 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3274 unsigned cost, acost, complexity;
3275 enum ainc_type autoinc_type;
3276 bool offset_p, ratio_p, autoinc;
3277 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3278 unsigned HOST_WIDE_INT mask;
3279 unsigned bits;
3281 if (data_index >= address_cost_data_list.length ())
3282 address_cost_data_list.safe_grow_cleared (data_index + 1);
3284 data = address_cost_data_list[data_index];
3285 if (!data)
3287 HOST_WIDE_INT i;
3288 HOST_WIDE_INT rat, off = 0;
3289 int old_cse_not_expected, width;
3290 unsigned sym_p, var_p, off_p, rat_p, add_c;
3291 rtx_insn *seq;
3292 rtx addr, base;
3293 rtx reg0, reg1;
3295 data = (address_cost_data) xcalloc (1, sizeof (*data));
3297 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3299 width = GET_MODE_BITSIZE (address_mode) - 1;
3300 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3301 width = HOST_BITS_PER_WIDE_INT - 1;
3302 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3304 for (i = width; i >= 0; i--)
3306 off = -((unsigned HOST_WIDE_INT) 1 << i);
3307 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3308 if (memory_address_addr_space_p (mem_mode, addr, as))
3309 break;
3311 data->min_offset = (i == -1? 0 : off);
3313 for (i = width; i >= 0; i--)
3315 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3316 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3317 if (memory_address_addr_space_p (mem_mode, addr, as))
3318 break;
3319 /* For some TARGET, like ARM THUMB1, the offset should be nature
3320 aligned. Try an aligned offset if address_mode is not QImode. */
3321 off = (address_mode == QImode)
3323 : ((unsigned HOST_WIDE_INT) 1 << i)
3324 - GET_MODE_SIZE (address_mode);
3325 if (off > 0)
3327 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3328 if (memory_address_addr_space_p (mem_mode, addr, as))
3329 break;
3332 if (i == -1)
3333 off = 0;
3334 data->max_offset = off;
3336 if (dump_file && (dump_flags & TDF_DETAILS))
3338 fprintf (dump_file, "get_address_cost:\n");
3339 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3340 GET_MODE_NAME (mem_mode),
3341 data->min_offset);
3342 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3343 GET_MODE_NAME (mem_mode),
3344 data->max_offset);
3347 rat = 1;
3348 for (i = 2; i <= MAX_RATIO; i++)
3349 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3351 rat = i;
3352 break;
3355 /* Compute the cost of various addressing modes. */
3356 acost = 0;
3357 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3358 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3360 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3361 || USE_STORE_PRE_DECREMENT (mem_mode))
3363 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3364 has_predec[mem_mode]
3365 = memory_address_addr_space_p (mem_mode, addr, as);
3367 if (has_predec[mem_mode])
3368 data->ainc_costs[AINC_PRE_DEC]
3369 = address_cost (addr, mem_mode, as, speed);
3371 if (USE_LOAD_POST_DECREMENT (mem_mode)
3372 || USE_STORE_POST_DECREMENT (mem_mode))
3374 addr = gen_rtx_POST_DEC (address_mode, reg0);
3375 has_postdec[mem_mode]
3376 = memory_address_addr_space_p (mem_mode, addr, as);
3378 if (has_postdec[mem_mode])
3379 data->ainc_costs[AINC_POST_DEC]
3380 = address_cost (addr, mem_mode, as, speed);
3382 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3383 || USE_STORE_PRE_DECREMENT (mem_mode))
3385 addr = gen_rtx_PRE_INC (address_mode, reg0);
3386 has_preinc[mem_mode]
3387 = memory_address_addr_space_p (mem_mode, addr, as);
3389 if (has_preinc[mem_mode])
3390 data->ainc_costs[AINC_PRE_INC]
3391 = address_cost (addr, mem_mode, as, speed);
3393 if (USE_LOAD_POST_INCREMENT (mem_mode)
3394 || USE_STORE_POST_INCREMENT (mem_mode))
3396 addr = gen_rtx_POST_INC (address_mode, reg0);
3397 has_postinc[mem_mode]
3398 = memory_address_addr_space_p (mem_mode, addr, as);
3400 if (has_postinc[mem_mode])
3401 data->ainc_costs[AINC_POST_INC]
3402 = address_cost (addr, mem_mode, as, speed);
3404 for (i = 0; i < 16; i++)
3406 sym_p = i & 1;
3407 var_p = (i >> 1) & 1;
3408 off_p = (i >> 2) & 1;
3409 rat_p = (i >> 3) & 1;
3411 addr = reg0;
3412 if (rat_p)
3413 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3414 gen_int_mode (rat, address_mode));
3416 if (var_p)
3417 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3419 if (sym_p)
3421 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3422 /* ??? We can run into trouble with some backends by presenting
3423 it with symbols which haven't been properly passed through
3424 targetm.encode_section_info. By setting the local bit, we
3425 enhance the probability of things working. */
3426 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3428 if (off_p)
3429 base = gen_rtx_fmt_e (CONST, address_mode,
3430 gen_rtx_fmt_ee
3431 (PLUS, address_mode, base,
3432 gen_int_mode (off, address_mode)));
3434 else if (off_p)
3435 base = gen_int_mode (off, address_mode);
3436 else
3437 base = NULL_RTX;
3439 if (base)
3440 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3442 start_sequence ();
3443 /* To avoid splitting addressing modes, pretend that no cse will
3444 follow. */
3445 old_cse_not_expected = cse_not_expected;
3446 cse_not_expected = true;
3447 addr = memory_address_addr_space (mem_mode, addr, as);
3448 cse_not_expected = old_cse_not_expected;
3449 seq = get_insns ();
3450 end_sequence ();
3452 acost = seq_cost (seq, speed);
3453 acost += address_cost (addr, mem_mode, as, speed);
3455 if (!acost)
3456 acost = 1;
3457 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3460 /* On some targets, it is quite expensive to load symbol to a register,
3461 which makes addresses that contain symbols look much more expensive.
3462 However, the symbol will have to be loaded in any case before the
3463 loop (and quite likely we have it in register already), so it does not
3464 make much sense to penalize them too heavily. So make some final
3465 tweaks for the SYMBOL_PRESENT modes:
3467 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3468 var is cheaper, use this mode with small penalty.
3469 If VAR_PRESENT is true, try whether the mode with
3470 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3471 if this is the case, use it. */
3472 add_c = add_cost (speed, address_mode);
3473 for (i = 0; i < 8; i++)
3475 var_p = i & 1;
3476 off_p = (i >> 1) & 1;
3477 rat_p = (i >> 2) & 1;
3479 acost = data->costs[0][1][off_p][rat_p] + 1;
3480 if (var_p)
3481 acost += add_c;
3483 if (acost < data->costs[1][var_p][off_p][rat_p])
3484 data->costs[1][var_p][off_p][rat_p] = acost;
3487 if (dump_file && (dump_flags & TDF_DETAILS))
3489 fprintf (dump_file, "Address costs:\n");
3491 for (i = 0; i < 16; i++)
3493 sym_p = i & 1;
3494 var_p = (i >> 1) & 1;
3495 off_p = (i >> 2) & 1;
3496 rat_p = (i >> 3) & 1;
3498 fprintf (dump_file, " ");
3499 if (sym_p)
3500 fprintf (dump_file, "sym + ");
3501 if (var_p)
3502 fprintf (dump_file, "var + ");
3503 if (off_p)
3504 fprintf (dump_file, "cst + ");
3505 if (rat_p)
3506 fprintf (dump_file, "rat * ");
3508 acost = data->costs[sym_p][var_p][off_p][rat_p];
3509 fprintf (dump_file, "index costs %d\n", acost);
3511 if (has_predec[mem_mode] || has_postdec[mem_mode]
3512 || has_preinc[mem_mode] || has_postinc[mem_mode])
3513 fprintf (dump_file, " May include autoinc/dec\n");
3514 fprintf (dump_file, "\n");
3517 address_cost_data_list[data_index] = data;
3520 bits = GET_MODE_BITSIZE (address_mode);
3521 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3522 offset &= mask;
3523 if ((offset >> (bits - 1) & 1))
3524 offset |= ~mask;
3525 s_offset = offset;
3527 autoinc = false;
3528 autoinc_type = AINC_NONE;
3529 msize = GET_MODE_SIZE (mem_mode);
3530 autoinc_offset = offset;
3531 if (stmt_after_inc)
3532 autoinc_offset += ratio * cstep;
3533 if (symbol_present || var_present || ratio != 1)
3534 autoinc = false;
3535 else
3537 if (has_postinc[mem_mode] && autoinc_offset == 0
3538 && msize == cstep)
3539 autoinc_type = AINC_POST_INC;
3540 else if (has_postdec[mem_mode] && autoinc_offset == 0
3541 && msize == -cstep)
3542 autoinc_type = AINC_POST_DEC;
3543 else if (has_preinc[mem_mode] && autoinc_offset == msize
3544 && msize == cstep)
3545 autoinc_type = AINC_PRE_INC;
3546 else if (has_predec[mem_mode] && autoinc_offset == -msize
3547 && msize == -cstep)
3548 autoinc_type = AINC_PRE_DEC;
3550 if (autoinc_type != AINC_NONE)
3551 autoinc = true;
3554 cost = 0;
3555 offset_p = (s_offset != 0
3556 && data->min_offset <= s_offset
3557 && s_offset <= data->max_offset);
3558 ratio_p = (ratio != 1
3559 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3561 if (ratio != 1 && !ratio_p)
3562 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3564 if (s_offset && !offset_p && !symbol_present)
3565 cost += add_cost (speed, address_mode);
3567 if (may_autoinc)
3568 *may_autoinc = autoinc;
3569 if (autoinc)
3570 acost = data->ainc_costs[autoinc_type];
3571 else
3572 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3573 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3574 return new_cost (cost + acost, complexity);
3577 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3578 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3579 calculating the operands of EXPR. Returns true if successful, and returns
3580 the cost in COST. */
3582 static bool
3583 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3584 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3586 comp_cost res;
3587 tree op1 = TREE_OPERAND (expr, 1);
3588 tree cst = TREE_OPERAND (mult, 1);
3589 tree multop = TREE_OPERAND (mult, 0);
3590 int m = exact_log2 (int_cst_value (cst));
3591 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3592 int sa_cost;
3593 bool equal_p = false;
3595 if (!(m >= 0 && m < maxm))
3596 return false;
3598 if (operand_equal_p (op1, mult, 0))
3599 equal_p = true;
3601 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3602 ? shiftadd_cost (speed, mode, m)
3603 : (equal_p
3604 ? shiftsub1_cost (speed, mode, m)
3605 : shiftsub0_cost (speed, mode, m)));
3606 res = new_cost (sa_cost, 0);
3607 res = add_costs (res, equal_p ? cost0 : cost1);
3609 STRIP_NOPS (multop);
3610 if (!is_gimple_val (multop))
3611 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3613 *cost = res;
3614 return true;
3617 /* Estimates cost of forcing expression EXPR into a variable. */
3619 static comp_cost
3620 force_expr_to_var_cost (tree expr, bool speed)
3622 static bool costs_initialized = false;
3623 static unsigned integer_cost [2];
3624 static unsigned symbol_cost [2];
3625 static unsigned address_cost [2];
3626 tree op0, op1;
3627 comp_cost cost0, cost1, cost;
3628 enum machine_mode mode;
3630 if (!costs_initialized)
3632 tree type = build_pointer_type (integer_type_node);
3633 tree var, addr;
3634 rtx x;
3635 int i;
3637 var = create_tmp_var_raw (integer_type_node, "test_var");
3638 TREE_STATIC (var) = 1;
3639 x = produce_memory_decl_rtl (var, NULL);
3640 SET_DECL_RTL (var, x);
3642 addr = build1 (ADDR_EXPR, type, var);
3645 for (i = 0; i < 2; i++)
3647 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3648 2000), i);
3650 symbol_cost[i] = computation_cost (addr, i) + 1;
3652 address_cost[i]
3653 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3654 if (dump_file && (dump_flags & TDF_DETAILS))
3656 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3657 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3658 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3659 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3660 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3661 fprintf (dump_file, "\n");
3665 costs_initialized = true;
3668 STRIP_NOPS (expr);
3670 if (SSA_VAR_P (expr))
3671 return no_cost;
3673 if (is_gimple_min_invariant (expr))
3675 if (TREE_CODE (expr) == INTEGER_CST)
3676 return new_cost (integer_cost [speed], 0);
3678 if (TREE_CODE (expr) == ADDR_EXPR)
3680 tree obj = TREE_OPERAND (expr, 0);
3682 if (TREE_CODE (obj) == VAR_DECL
3683 || TREE_CODE (obj) == PARM_DECL
3684 || TREE_CODE (obj) == RESULT_DECL)
3685 return new_cost (symbol_cost [speed], 0);
3688 return new_cost (address_cost [speed], 0);
3691 switch (TREE_CODE (expr))
3693 case POINTER_PLUS_EXPR:
3694 case PLUS_EXPR:
3695 case MINUS_EXPR:
3696 case MULT_EXPR:
3697 op0 = TREE_OPERAND (expr, 0);
3698 op1 = TREE_OPERAND (expr, 1);
3699 STRIP_NOPS (op0);
3700 STRIP_NOPS (op1);
3701 break;
3703 CASE_CONVERT:
3704 case NEGATE_EXPR:
3705 op0 = TREE_OPERAND (expr, 0);
3706 STRIP_NOPS (op0);
3707 op1 = NULL_TREE;
3708 break;
3710 default:
3711 /* Just an arbitrary value, FIXME. */
3712 return new_cost (target_spill_cost[speed], 0);
3715 if (op0 == NULL_TREE
3716 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3717 cost0 = no_cost;
3718 else
3719 cost0 = force_expr_to_var_cost (op0, speed);
3721 if (op1 == NULL_TREE
3722 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3723 cost1 = no_cost;
3724 else
3725 cost1 = force_expr_to_var_cost (op1, speed);
3727 mode = TYPE_MODE (TREE_TYPE (expr));
3728 switch (TREE_CODE (expr))
3730 case POINTER_PLUS_EXPR:
3731 case PLUS_EXPR:
3732 case MINUS_EXPR:
3733 case NEGATE_EXPR:
3734 cost = new_cost (add_cost (speed, mode), 0);
3735 if (TREE_CODE (expr) != NEGATE_EXPR)
3737 tree mult = NULL_TREE;
3738 comp_cost sa_cost;
3739 if (TREE_CODE (op1) == MULT_EXPR)
3740 mult = op1;
3741 else if (TREE_CODE (op0) == MULT_EXPR)
3742 mult = op0;
3744 if (mult != NULL_TREE
3745 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3746 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3747 speed, &sa_cost))
3748 return sa_cost;
3750 break;
3752 CASE_CONVERT:
3754 tree inner_mode, outer_mode;
3755 outer_mode = TREE_TYPE (expr);
3756 inner_mode = TREE_TYPE (op0);
3757 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3758 TYPE_MODE (inner_mode), speed), 0);
3760 break;
3762 case MULT_EXPR:
3763 if (cst_and_fits_in_hwi (op0))
3764 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3765 mode, speed), 0);
3766 else if (cst_and_fits_in_hwi (op1))
3767 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3768 mode, speed), 0);
3769 else
3770 return new_cost (target_spill_cost [speed], 0);
3771 break;
3773 default:
3774 gcc_unreachable ();
3777 cost = add_costs (cost, cost0);
3778 cost = add_costs (cost, cost1);
3780 /* Bound the cost by target_spill_cost. The parts of complicated
3781 computations often are either loop invariant or at least can
3782 be shared between several iv uses, so letting this grow without
3783 limits would not give reasonable results. */
3784 if (cost.cost > (int) target_spill_cost [speed])
3785 cost.cost = target_spill_cost [speed];
3787 return cost;
3790 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3791 invariants the computation depends on. */
3793 static comp_cost
3794 force_var_cost (struct ivopts_data *data,
3795 tree expr, bitmap *depends_on)
3797 if (depends_on)
3799 fd_ivopts_data = data;
3800 walk_tree (&expr, find_depends, depends_on, NULL);
3803 return force_expr_to_var_cost (expr, data->speed);
3806 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3807 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3808 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3809 invariants the computation depends on. */
3811 static comp_cost
3812 split_address_cost (struct ivopts_data *data,
3813 tree addr, bool *symbol_present, bool *var_present,
3814 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3816 tree core;
3817 HOST_WIDE_INT bitsize;
3818 HOST_WIDE_INT bitpos;
3819 tree toffset;
3820 enum machine_mode mode;
3821 int unsignedp, volatilep;
3823 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3824 &unsignedp, &volatilep, false);
3826 if (toffset != 0
3827 || bitpos % BITS_PER_UNIT != 0
3828 || TREE_CODE (core) != VAR_DECL)
3830 *symbol_present = false;
3831 *var_present = true;
3832 fd_ivopts_data = data;
3833 walk_tree (&addr, find_depends, depends_on, NULL);
3834 return new_cost (target_spill_cost[data->speed], 0);
3837 *offset += bitpos / BITS_PER_UNIT;
3838 if (TREE_STATIC (core)
3839 || DECL_EXTERNAL (core))
3841 *symbol_present = true;
3842 *var_present = false;
3843 return no_cost;
3846 *symbol_present = false;
3847 *var_present = true;
3848 return no_cost;
3851 /* Estimates cost of expressing difference of addresses E1 - E2 as
3852 var + symbol + offset. The value of offset is added to OFFSET,
3853 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3854 part is missing. DEPENDS_ON is a set of the invariants the computation
3855 depends on. */
3857 static comp_cost
3858 ptr_difference_cost (struct ivopts_data *data,
3859 tree e1, tree e2, bool *symbol_present, bool *var_present,
3860 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3862 HOST_WIDE_INT diff = 0;
3863 aff_tree aff_e1, aff_e2;
3864 tree type;
3866 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3868 if (ptr_difference_const (e1, e2, &diff))
3870 *offset += diff;
3871 *symbol_present = false;
3872 *var_present = false;
3873 return no_cost;
3876 if (integer_zerop (e2))
3877 return split_address_cost (data, TREE_OPERAND (e1, 0),
3878 symbol_present, var_present, offset, depends_on);
3880 *symbol_present = false;
3881 *var_present = true;
3883 type = signed_type_for (TREE_TYPE (e1));
3884 tree_to_aff_combination (e1, type, &aff_e1);
3885 tree_to_aff_combination (e2, type, &aff_e2);
3886 aff_combination_scale (&aff_e2, -1);
3887 aff_combination_add (&aff_e1, &aff_e2);
3889 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3892 /* Estimates cost of expressing difference E1 - E2 as
3893 var + symbol + offset. The value of offset is added to OFFSET,
3894 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3895 part is missing. DEPENDS_ON is a set of the invariants the computation
3896 depends on. */
3898 static comp_cost
3899 difference_cost (struct ivopts_data *data,
3900 tree e1, tree e2, bool *symbol_present, bool *var_present,
3901 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3903 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3904 unsigned HOST_WIDE_INT off1, off2;
3905 aff_tree aff_e1, aff_e2;
3906 tree type;
3908 e1 = strip_offset (e1, &off1);
3909 e2 = strip_offset (e2, &off2);
3910 *offset += off1 - off2;
3912 STRIP_NOPS (e1);
3913 STRIP_NOPS (e2);
3915 if (TREE_CODE (e1) == ADDR_EXPR)
3916 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3917 offset, depends_on);
3918 *symbol_present = false;
3920 if (operand_equal_p (e1, e2, 0))
3922 *var_present = false;
3923 return no_cost;
3926 *var_present = true;
3928 if (integer_zerop (e2))
3929 return force_var_cost (data, e1, depends_on);
3931 if (integer_zerop (e1))
3933 comp_cost cost = force_var_cost (data, e2, depends_on);
3934 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3935 return cost;
3938 type = signed_type_for (TREE_TYPE (e1));
3939 tree_to_aff_combination (e1, type, &aff_e1);
3940 tree_to_aff_combination (e2, type, &aff_e2);
3941 aff_combination_scale (&aff_e2, -1);
3942 aff_combination_add (&aff_e1, &aff_e2);
3944 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3947 /* Returns true if AFF1 and AFF2 are identical. */
3949 static bool
3950 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3952 unsigned i;
3954 if (aff1->n != aff2->n)
3955 return false;
3957 for (i = 0; i < aff1->n; i++)
3959 if (aff1->elts[i].coef != aff2->elts[i].coef)
3960 return false;
3962 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3963 return false;
3965 return true;
3968 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3970 static int
3971 get_expr_id (struct ivopts_data *data, tree expr)
3973 struct iv_inv_expr_ent ent;
3974 struct iv_inv_expr_ent **slot;
3976 ent.expr = expr;
3977 ent.hash = iterative_hash_expr (expr, 0);
3978 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3979 if (*slot)
3980 return (*slot)->id;
3982 *slot = XNEW (struct iv_inv_expr_ent);
3983 (*slot)->expr = expr;
3984 (*slot)->hash = ent.hash;
3985 (*slot)->id = data->inv_expr_id++;
3986 return (*slot)->id;
3989 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3990 requires a new compiler generated temporary. Returns -1 otherwise.
3991 ADDRESS_P is a flag indicating if the expression is for address
3992 computation. */
3994 static int
3995 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3996 tree cbase, HOST_WIDE_INT ratio,
3997 bool address_p)
3999 aff_tree ubase_aff, cbase_aff;
4000 tree expr, ub, cb;
4002 STRIP_NOPS (ubase);
4003 STRIP_NOPS (cbase);
4004 ub = ubase;
4005 cb = cbase;
4007 if ((TREE_CODE (ubase) == INTEGER_CST)
4008 && (TREE_CODE (cbase) == INTEGER_CST))
4009 return -1;
4011 /* Strips the constant part. */
4012 if (TREE_CODE (ubase) == PLUS_EXPR
4013 || TREE_CODE (ubase) == MINUS_EXPR
4014 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4016 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4017 ubase = TREE_OPERAND (ubase, 0);
4020 /* Strips the constant part. */
4021 if (TREE_CODE (cbase) == PLUS_EXPR
4022 || TREE_CODE (cbase) == MINUS_EXPR
4023 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4025 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4026 cbase = TREE_OPERAND (cbase, 0);
4029 if (address_p)
4031 if (((TREE_CODE (ubase) == SSA_NAME)
4032 || (TREE_CODE (ubase) == ADDR_EXPR
4033 && is_gimple_min_invariant (ubase)))
4034 && (TREE_CODE (cbase) == INTEGER_CST))
4035 return -1;
4037 if (((TREE_CODE (cbase) == SSA_NAME)
4038 || (TREE_CODE (cbase) == ADDR_EXPR
4039 && is_gimple_min_invariant (cbase)))
4040 && (TREE_CODE (ubase) == INTEGER_CST))
4041 return -1;
4044 if (ratio == 1)
4046 if (operand_equal_p (ubase, cbase, 0))
4047 return -1;
4049 if (TREE_CODE (ubase) == ADDR_EXPR
4050 && TREE_CODE (cbase) == ADDR_EXPR)
4052 tree usym, csym;
4054 usym = TREE_OPERAND (ubase, 0);
4055 csym = TREE_OPERAND (cbase, 0);
4056 if (TREE_CODE (usym) == ARRAY_REF)
4058 tree ind = TREE_OPERAND (usym, 1);
4059 if (TREE_CODE (ind) == INTEGER_CST
4060 && tree_fits_shwi_p (ind)
4061 && tree_to_shwi (ind) == 0)
4062 usym = TREE_OPERAND (usym, 0);
4064 if (TREE_CODE (csym) == ARRAY_REF)
4066 tree ind = TREE_OPERAND (csym, 1);
4067 if (TREE_CODE (ind) == INTEGER_CST
4068 && tree_fits_shwi_p (ind)
4069 && tree_to_shwi (ind) == 0)
4070 csym = TREE_OPERAND (csym, 0);
4072 if (operand_equal_p (usym, csym, 0))
4073 return -1;
4075 /* Now do more complex comparison */
4076 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4077 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4078 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4079 return -1;
4082 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4083 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4085 aff_combination_scale (&cbase_aff, -1 * ratio);
4086 aff_combination_add (&ubase_aff, &cbase_aff);
4087 expr = aff_combination_to_tree (&ubase_aff);
4088 return get_expr_id (data, expr);
4093 /* Determines the cost of the computation by that USE is expressed
4094 from induction variable CAND. If ADDRESS_P is true, we just need
4095 to create an address from it, otherwise we want to get it into
4096 register. A set of invariants we depend on is stored in
4097 DEPENDS_ON. AT is the statement at that the value is computed.
4098 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4099 addressing is likely. */
4101 static comp_cost
4102 get_computation_cost_at (struct ivopts_data *data,
4103 struct iv_use *use, struct iv_cand *cand,
4104 bool address_p, bitmap *depends_on, gimple at,
4105 bool *can_autoinc,
4106 int *inv_expr_id)
4108 tree ubase = use->iv->base, ustep = use->iv->step;
4109 tree cbase, cstep;
4110 tree utype = TREE_TYPE (ubase), ctype;
4111 unsigned HOST_WIDE_INT cstepi, offset = 0;
4112 HOST_WIDE_INT ratio, aratio;
4113 bool var_present, symbol_present, stmt_is_after_inc;
4114 comp_cost cost;
4115 widest_int rat;
4116 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4117 enum machine_mode mem_mode = (address_p
4118 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4119 : VOIDmode);
4121 *depends_on = NULL;
4123 /* Only consider real candidates. */
4124 if (!cand->iv)
4125 return infinite_cost;
4127 cbase = cand->iv->base;
4128 cstep = cand->iv->step;
4129 ctype = TREE_TYPE (cbase);
4131 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4133 /* We do not have a precision to express the values of use. */
4134 return infinite_cost;
4137 if (address_p
4138 || (use->iv->base_object
4139 && cand->iv->base_object
4140 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4141 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4143 /* Do not try to express address of an object with computation based
4144 on address of a different object. This may cause problems in rtl
4145 level alias analysis (that does not expect this to be happening,
4146 as this is illegal in C), and would be unlikely to be useful
4147 anyway. */
4148 if (use->iv->base_object
4149 && cand->iv->base_object
4150 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4151 return infinite_cost;
4154 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4156 /* TODO -- add direct handling of this case. */
4157 goto fallback;
4160 /* CSTEPI is removed from the offset in case statement is after the
4161 increment. If the step is not constant, we use zero instead.
4162 This is a bit imprecise (there is the extra addition), but
4163 redundancy elimination is likely to transform the code so that
4164 it uses value of the variable before increment anyway,
4165 so it is not that much unrealistic. */
4166 if (cst_and_fits_in_hwi (cstep))
4167 cstepi = int_cst_value (cstep);
4168 else
4169 cstepi = 0;
4171 if (!constant_multiple_of (ustep, cstep, &rat))
4172 return infinite_cost;
4174 if (wi::fits_shwi_p (rat))
4175 ratio = rat.to_shwi ();
4176 else
4177 return infinite_cost;
4179 STRIP_NOPS (cbase);
4180 ctype = TREE_TYPE (cbase);
4182 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4184 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4185 or ratio == 1, it is better to handle this like
4187 ubase - ratio * cbase + ratio * var
4189 (also holds in the case ratio == -1, TODO. */
4191 if (cst_and_fits_in_hwi (cbase))
4193 offset = - ratio * int_cst_value (cbase);
4194 cost = difference_cost (data,
4195 ubase, build_int_cst (utype, 0),
4196 &symbol_present, &var_present, &offset,
4197 depends_on);
4198 cost.cost /= avg_loop_niter (data->current_loop);
4200 else if (ratio == 1)
4202 tree real_cbase = cbase;
4204 /* Check to see if any adjustment is needed. */
4205 if (cstepi == 0 && stmt_is_after_inc)
4207 aff_tree real_cbase_aff;
4208 aff_tree cstep_aff;
4210 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4211 &real_cbase_aff);
4212 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4214 aff_combination_add (&real_cbase_aff, &cstep_aff);
4215 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4218 cost = difference_cost (data,
4219 ubase, real_cbase,
4220 &symbol_present, &var_present, &offset,
4221 depends_on);
4222 cost.cost /= avg_loop_niter (data->current_loop);
4224 else if (address_p
4225 && !POINTER_TYPE_P (ctype)
4226 && multiplier_allowed_in_address_p
4227 (ratio, mem_mode,
4228 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4230 cbase
4231 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4232 cost = difference_cost (data,
4233 ubase, cbase,
4234 &symbol_present, &var_present, &offset,
4235 depends_on);
4236 cost.cost /= avg_loop_niter (data->current_loop);
4238 else
4240 cost = force_var_cost (data, cbase, depends_on);
4241 cost = add_costs (cost,
4242 difference_cost (data,
4243 ubase, build_int_cst (utype, 0),
4244 &symbol_present, &var_present,
4245 &offset, depends_on));
4246 cost.cost /= avg_loop_niter (data->current_loop);
4247 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4250 if (inv_expr_id)
4252 *inv_expr_id =
4253 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4254 /* Clear depends on. */
4255 if (*inv_expr_id != -1 && depends_on && *depends_on)
4256 bitmap_clear (*depends_on);
4259 /* If we are after the increment, the value of the candidate is higher by
4260 one iteration. */
4261 if (stmt_is_after_inc)
4262 offset -= ratio * cstepi;
4264 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4265 (symbol/var1/const parts may be omitted). If we are looking for an
4266 address, find the cost of addressing this. */
4267 if (address_p)
4268 return add_costs (cost,
4269 get_address_cost (symbol_present, var_present,
4270 offset, ratio, cstepi,
4271 mem_mode,
4272 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4273 speed, stmt_is_after_inc,
4274 can_autoinc));
4276 /* Otherwise estimate the costs for computing the expression. */
4277 if (!symbol_present && !var_present && !offset)
4279 if (ratio != 1)
4280 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4281 return cost;
4284 /* Symbol + offset should be compile-time computable so consider that they
4285 are added once to the variable, if present. */
4286 if (var_present && (symbol_present || offset))
4287 cost.cost += adjust_setup_cost (data,
4288 add_cost (speed, TYPE_MODE (ctype)));
4290 /* Having offset does not affect runtime cost in case it is added to
4291 symbol, but it increases complexity. */
4292 if (offset)
4293 cost.complexity++;
4295 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4297 aratio = ratio > 0 ? ratio : -ratio;
4298 if (aratio != 1)
4299 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4300 return cost;
4302 fallback:
4303 if (can_autoinc)
4304 *can_autoinc = false;
4307 /* Just get the expression, expand it and measure the cost. */
4308 tree comp = get_computation_at (data->current_loop, use, cand, at);
4310 if (!comp)
4311 return infinite_cost;
4313 if (address_p)
4314 comp = build_simple_mem_ref (comp);
4316 return new_cost (computation_cost (comp, speed), 0);
4320 /* Determines the cost of the computation by that USE is expressed
4321 from induction variable CAND. If ADDRESS_P is true, we just need
4322 to create an address from it, otherwise we want to get it into
4323 register. A set of invariants we depend on is stored in
4324 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4325 autoinc addressing is likely. */
4327 static comp_cost
4328 get_computation_cost (struct ivopts_data *data,
4329 struct iv_use *use, struct iv_cand *cand,
4330 bool address_p, bitmap *depends_on,
4331 bool *can_autoinc, int *inv_expr_id)
4333 return get_computation_cost_at (data,
4334 use, cand, address_p, depends_on, use->stmt,
4335 can_autoinc, inv_expr_id);
4338 /* Determines cost of basing replacement of USE on CAND in a generic
4339 expression. */
4341 static bool
4342 determine_use_iv_cost_generic (struct ivopts_data *data,
4343 struct iv_use *use, struct iv_cand *cand)
4345 bitmap depends_on;
4346 comp_cost cost;
4347 int inv_expr_id = -1;
4349 /* The simple case first -- if we need to express value of the preserved
4350 original biv, the cost is 0. This also prevents us from counting the
4351 cost of increment twice -- once at this use and once in the cost of
4352 the candidate. */
4353 if (cand->pos == IP_ORIGINAL
4354 && cand->incremented_at == use->stmt)
4356 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4357 ERROR_MARK, -1);
4358 return true;
4361 cost = get_computation_cost (data, use, cand, false, &depends_on,
4362 NULL, &inv_expr_id);
4364 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4365 inv_expr_id);
4367 return !infinite_cost_p (cost);
4370 /* Determines cost of basing replacement of USE on CAND in an address. */
4372 static bool
4373 determine_use_iv_cost_address (struct ivopts_data *data,
4374 struct iv_use *use, struct iv_cand *cand)
4376 bitmap depends_on;
4377 bool can_autoinc;
4378 int inv_expr_id = -1;
4379 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4380 &can_autoinc, &inv_expr_id);
4382 if (cand->ainc_use == use)
4384 if (can_autoinc)
4385 cost.cost -= cand->cost_step;
4386 /* If we generated the candidate solely for exploiting autoincrement
4387 opportunities, and it turns out it can't be used, set the cost to
4388 infinity to make sure we ignore it. */
4389 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4390 cost = infinite_cost;
4392 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4393 inv_expr_id);
4395 return !infinite_cost_p (cost);
4398 /* Computes value of candidate CAND at position AT in iteration NITER, and
4399 stores it to VAL. */
4401 static void
4402 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4403 aff_tree *val)
4405 aff_tree step, delta, nit;
4406 struct iv *iv = cand->iv;
4407 tree type = TREE_TYPE (iv->base);
4408 tree steptype = type;
4409 if (POINTER_TYPE_P (type))
4410 steptype = sizetype;
4411 steptype = unsigned_type_for (type);
4413 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4414 aff_combination_convert (&step, steptype);
4415 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4416 aff_combination_convert (&nit, steptype);
4417 aff_combination_mult (&nit, &step, &delta);
4418 if (stmt_after_increment (loop, cand, at))
4419 aff_combination_add (&delta, &step);
4421 tree_to_aff_combination (iv->base, type, val);
4422 if (!POINTER_TYPE_P (type))
4423 aff_combination_convert (val, steptype);
4424 aff_combination_add (val, &delta);
4427 /* Returns period of induction variable iv. */
4429 static tree
4430 iv_period (struct iv *iv)
4432 tree step = iv->step, period, type;
4433 tree pow2div;
4435 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4437 type = unsigned_type_for (TREE_TYPE (step));
4438 /* Period of the iv is lcm (step, type_range)/step -1,
4439 i.e., N*type_range/step - 1. Since type range is power
4440 of two, N == (step >> num_of_ending_zeros_binary (step),
4441 so the final result is
4443 (type_range >> num_of_ending_zeros_binary (step)) - 1
4446 pow2div = num_ending_zeros (step);
4448 period = build_low_bits_mask (type,
4449 (TYPE_PRECISION (type)
4450 - tree_to_uhwi (pow2div)));
4452 return period;
4455 /* Returns the comparison operator used when eliminating the iv USE. */
4457 static enum tree_code
4458 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4460 struct loop *loop = data->current_loop;
4461 basic_block ex_bb;
4462 edge exit;
4464 ex_bb = gimple_bb (use->stmt);
4465 exit = EDGE_SUCC (ex_bb, 0);
4466 if (flow_bb_inside_loop_p (loop, exit->dest))
4467 exit = EDGE_SUCC (ex_bb, 1);
4469 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4472 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4473 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4474 calculation is performed in non-wrapping type.
4476 TODO: More generally, we could test for the situation that
4477 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4478 This would require knowing the sign of OFFSET. */
4480 static bool
4481 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4483 enum tree_code code;
4484 tree e1, e2;
4485 aff_tree aff_e1, aff_e2, aff_offset;
4487 if (!nowrap_type_p (TREE_TYPE (base)))
4488 return false;
4490 base = expand_simple_operations (base);
4492 if (TREE_CODE (base) == SSA_NAME)
4494 gimple stmt = SSA_NAME_DEF_STMT (base);
4496 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4497 return false;
4499 code = gimple_assign_rhs_code (stmt);
4500 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4501 return false;
4503 e1 = gimple_assign_rhs1 (stmt);
4504 e2 = gimple_assign_rhs2 (stmt);
4506 else
4508 code = TREE_CODE (base);
4509 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4510 return false;
4511 e1 = TREE_OPERAND (base, 0);
4512 e2 = TREE_OPERAND (base, 1);
4515 /* Use affine expansion as deeper inspection to prove the equality. */
4516 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4517 &aff_e2, &data->name_expansion_cache);
4518 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4519 &aff_offset, &data->name_expansion_cache);
4520 aff_combination_scale (&aff_offset, -1);
4521 switch (code)
4523 case PLUS_EXPR:
4524 aff_combination_add (&aff_e2, &aff_offset);
4525 if (aff_combination_zero_p (&aff_e2))
4526 return true;
4528 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4529 &aff_e1, &data->name_expansion_cache);
4530 aff_combination_add (&aff_e1, &aff_offset);
4531 return aff_combination_zero_p (&aff_e1);
4533 case POINTER_PLUS_EXPR:
4534 aff_combination_add (&aff_e2, &aff_offset);
4535 return aff_combination_zero_p (&aff_e2);
4537 default:
4538 return false;
4542 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4543 comparison with CAND. NITER describes the number of iterations of
4544 the loops. If successful, the comparison in COMP_P is altered accordingly.
4546 We aim to handle the following situation:
4548 sometype *base, *p;
4549 int a, b, i;
4551 i = a;
4552 p = p_0 = base + a;
4556 bla (*p);
4557 p++;
4558 i++;
4560 while (i < b);
4562 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4563 We aim to optimize this to
4565 p = p_0 = base + a;
4568 bla (*p);
4569 p++;
4571 while (p < p_0 - a + b);
4573 This preserves the correctness, since the pointer arithmetics does not
4574 overflow. More precisely:
4576 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4577 overflow in computing it or the values of p.
4578 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4579 overflow. To prove this, we use the fact that p_0 = base + a. */
4581 static bool
4582 iv_elimination_compare_lt (struct ivopts_data *data,
4583 struct iv_cand *cand, enum tree_code *comp_p,
4584 struct tree_niter_desc *niter)
4586 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4587 struct aff_tree nit, tmpa, tmpb;
4588 enum tree_code comp;
4589 HOST_WIDE_INT step;
4591 /* We need to know that the candidate induction variable does not overflow.
4592 While more complex analysis may be used to prove this, for now just
4593 check that the variable appears in the original program and that it
4594 is computed in a type that guarantees no overflows. */
4595 cand_type = TREE_TYPE (cand->iv->base);
4596 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4597 return false;
4599 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4600 the calculation of the BOUND could overflow, making the comparison
4601 invalid. */
4602 if (!data->loop_single_exit_p)
4603 return false;
4605 /* We need to be able to decide whether candidate is increasing or decreasing
4606 in order to choose the right comparison operator. */
4607 if (!cst_and_fits_in_hwi (cand->iv->step))
4608 return false;
4609 step = int_cst_value (cand->iv->step);
4611 /* Check that the number of iterations matches the expected pattern:
4612 a + 1 > b ? 0 : b - a - 1. */
4613 mbz = niter->may_be_zero;
4614 if (TREE_CODE (mbz) == GT_EXPR)
4616 /* Handle a + 1 > b. */
4617 tree op0 = TREE_OPERAND (mbz, 0);
4618 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4620 a = TREE_OPERAND (op0, 0);
4621 b = TREE_OPERAND (mbz, 1);
4623 else
4624 return false;
4626 else if (TREE_CODE (mbz) == LT_EXPR)
4628 tree op1 = TREE_OPERAND (mbz, 1);
4630 /* Handle b < a + 1. */
4631 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4633 a = TREE_OPERAND (op1, 0);
4634 b = TREE_OPERAND (mbz, 0);
4636 else
4637 return false;
4639 else
4640 return false;
4642 /* Expected number of iterations is B - A - 1. Check that it matches
4643 the actual number, i.e., that B - A - NITER = 1. */
4644 tree_to_aff_combination (niter->niter, nit_type, &nit);
4645 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4646 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4647 aff_combination_scale (&nit, -1);
4648 aff_combination_scale (&tmpa, -1);
4649 aff_combination_add (&tmpb, &tmpa);
4650 aff_combination_add (&tmpb, &nit);
4651 if (tmpb.n != 0 || tmpb.offset != 1)
4652 return false;
4654 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4655 overflow. */
4656 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4657 cand->iv->step,
4658 fold_convert (TREE_TYPE (cand->iv->step), a));
4659 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4660 return false;
4662 /* Determine the new comparison operator. */
4663 comp = step < 0 ? GT_EXPR : LT_EXPR;
4664 if (*comp_p == NE_EXPR)
4665 *comp_p = comp;
4666 else if (*comp_p == EQ_EXPR)
4667 *comp_p = invert_tree_comparison (comp, false);
4668 else
4669 gcc_unreachable ();
4671 return true;
4674 /* Check whether it is possible to express the condition in USE by comparison
4675 of candidate CAND. If so, store the value compared with to BOUND, and the
4676 comparison operator to COMP. */
4678 static bool
4679 may_eliminate_iv (struct ivopts_data *data,
4680 struct iv_use *use, struct iv_cand *cand, tree *bound,
4681 enum tree_code *comp)
4683 basic_block ex_bb;
4684 edge exit;
4685 tree period;
4686 struct loop *loop = data->current_loop;
4687 aff_tree bnd;
4688 struct tree_niter_desc *desc = NULL;
4690 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4691 return false;
4693 /* For now works only for exits that dominate the loop latch.
4694 TODO: extend to other conditions inside loop body. */
4695 ex_bb = gimple_bb (use->stmt);
4696 if (use->stmt != last_stmt (ex_bb)
4697 || gimple_code (use->stmt) != GIMPLE_COND
4698 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4699 return false;
4701 exit = EDGE_SUCC (ex_bb, 0);
4702 if (flow_bb_inside_loop_p (loop, exit->dest))
4703 exit = EDGE_SUCC (ex_bb, 1);
4704 if (flow_bb_inside_loop_p (loop, exit->dest))
4705 return false;
4707 desc = niter_for_exit (data, exit);
4708 if (!desc)
4709 return false;
4711 /* Determine whether we can use the variable to test the exit condition.
4712 This is the case iff the period of the induction variable is greater
4713 than the number of iterations for which the exit condition is true. */
4714 period = iv_period (cand->iv);
4716 /* If the number of iterations is constant, compare against it directly. */
4717 if (TREE_CODE (desc->niter) == INTEGER_CST)
4719 /* See cand_value_at. */
4720 if (stmt_after_increment (loop, cand, use->stmt))
4722 if (!tree_int_cst_lt (desc->niter, period))
4723 return false;
4725 else
4727 if (tree_int_cst_lt (period, desc->niter))
4728 return false;
4732 /* If not, and if this is the only possible exit of the loop, see whether
4733 we can get a conservative estimate on the number of iterations of the
4734 entire loop and compare against that instead. */
4735 else
4737 widest_int period_value, max_niter;
4739 max_niter = desc->max;
4740 if (stmt_after_increment (loop, cand, use->stmt))
4741 max_niter += 1;
4742 period_value = wi::to_widest (period);
4743 if (wi::gtu_p (max_niter, period_value))
4745 /* See if we can take advantage of inferred loop bound information. */
4746 if (data->loop_single_exit_p)
4748 if (!max_loop_iterations (loop, &max_niter))
4749 return false;
4750 /* The loop bound is already adjusted by adding 1. */
4751 if (wi::gtu_p (max_niter, period_value))
4752 return false;
4754 else
4755 return false;
4759 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4761 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4762 aff_combination_to_tree (&bnd));
4763 *comp = iv_elimination_compare (data, use);
4765 /* It is unlikely that computing the number of iterations using division
4766 would be more profitable than keeping the original induction variable. */
4767 if (expression_expensive_p (*bound))
4768 return false;
4770 /* Sometimes, it is possible to handle the situation that the number of
4771 iterations may be zero unless additional assumtions by using <
4772 instead of != in the exit condition.
4774 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4775 base the exit condition on it. However, that is often too
4776 expensive. */
4777 if (!integer_zerop (desc->may_be_zero))
4778 return iv_elimination_compare_lt (data, cand, comp, desc);
4780 return true;
4783 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4784 be copied, if is is used in the loop body and DATA->body_includes_call. */
4786 static int
4787 parm_decl_cost (struct ivopts_data *data, tree bound)
4789 tree sbound = bound;
4790 STRIP_NOPS (sbound);
4792 if (TREE_CODE (sbound) == SSA_NAME
4793 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4794 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4795 && data->body_includes_call)
4796 return COSTS_N_INSNS (1);
4798 return 0;
4801 /* Determines cost of basing replacement of USE on CAND in a condition. */
4803 static bool
4804 determine_use_iv_cost_condition (struct ivopts_data *data,
4805 struct iv_use *use, struct iv_cand *cand)
4807 tree bound = NULL_TREE;
4808 struct iv *cmp_iv;
4809 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4810 comp_cost elim_cost, express_cost, cost, bound_cost;
4811 bool ok;
4812 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4813 tree *control_var, *bound_cst;
4814 enum tree_code comp = ERROR_MARK;
4816 /* Only consider real candidates. */
4817 if (!cand->iv)
4819 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4820 ERROR_MARK, -1);
4821 return false;
4824 /* Try iv elimination. */
4825 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4827 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4828 if (elim_cost.cost == 0)
4829 elim_cost.cost = parm_decl_cost (data, bound);
4830 else if (TREE_CODE (bound) == INTEGER_CST)
4831 elim_cost.cost = 0;
4832 /* If we replace a loop condition 'i < n' with 'p < base + n',
4833 depends_on_elim will have 'base' and 'n' set, which implies
4834 that both 'base' and 'n' will be live during the loop. More likely,
4835 'base + n' will be loop invariant, resulting in only one live value
4836 during the loop. So in that case we clear depends_on_elim and set
4837 elim_inv_expr_id instead. */
4838 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4840 elim_inv_expr_id = get_expr_id (data, bound);
4841 bitmap_clear (depends_on_elim);
4843 /* The bound is a loop invariant, so it will be only computed
4844 once. */
4845 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4847 else
4848 elim_cost = infinite_cost;
4850 /* Try expressing the original giv. If it is compared with an invariant,
4851 note that we cannot get rid of it. */
4852 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4853 NULL, &cmp_iv);
4854 gcc_assert (ok);
4856 /* When the condition is a comparison of the candidate IV against
4857 zero, prefer this IV.
4859 TODO: The constant that we're subtracting from the cost should
4860 be target-dependent. This information should be added to the
4861 target costs for each backend. */
4862 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4863 && integer_zerop (*bound_cst)
4864 && (operand_equal_p (*control_var, cand->var_after, 0)
4865 || operand_equal_p (*control_var, cand->var_before, 0)))
4866 elim_cost.cost -= 1;
4868 express_cost = get_computation_cost (data, use, cand, false,
4869 &depends_on_express, NULL,
4870 &express_inv_expr_id);
4871 fd_ivopts_data = data;
4872 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4874 /* Count the cost of the original bound as well. */
4875 bound_cost = force_var_cost (data, *bound_cst, NULL);
4876 if (bound_cost.cost == 0)
4877 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4878 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4879 bound_cost.cost = 0;
4880 express_cost.cost += bound_cost.cost;
4882 /* Choose the better approach, preferring the eliminated IV. */
4883 if (compare_costs (elim_cost, express_cost) <= 0)
4885 cost = elim_cost;
4886 depends_on = depends_on_elim;
4887 depends_on_elim = NULL;
4888 inv_expr_id = elim_inv_expr_id;
4890 else
4892 cost = express_cost;
4893 depends_on = depends_on_express;
4894 depends_on_express = NULL;
4895 bound = NULL_TREE;
4896 comp = ERROR_MARK;
4897 inv_expr_id = express_inv_expr_id;
4900 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4902 if (depends_on_elim)
4903 BITMAP_FREE (depends_on_elim);
4904 if (depends_on_express)
4905 BITMAP_FREE (depends_on_express);
4907 return !infinite_cost_p (cost);
4910 /* Determines cost of basing replacement of USE on CAND. Returns false
4911 if USE cannot be based on CAND. */
4913 static bool
4914 determine_use_iv_cost (struct ivopts_data *data,
4915 struct iv_use *use, struct iv_cand *cand)
4917 switch (use->type)
4919 case USE_NONLINEAR_EXPR:
4920 return determine_use_iv_cost_generic (data, use, cand);
4922 case USE_ADDRESS:
4923 return determine_use_iv_cost_address (data, use, cand);
4925 case USE_COMPARE:
4926 return determine_use_iv_cost_condition (data, use, cand);
4928 default:
4929 gcc_unreachable ();
4933 /* Return true if get_computation_cost indicates that autoincrement is
4934 a possibility for the pair of USE and CAND, false otherwise. */
4936 static bool
4937 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4938 struct iv_cand *cand)
4940 bitmap depends_on;
4941 bool can_autoinc;
4942 comp_cost cost;
4944 if (use->type != USE_ADDRESS)
4945 return false;
4947 cost = get_computation_cost (data, use, cand, true, &depends_on,
4948 &can_autoinc, NULL);
4950 BITMAP_FREE (depends_on);
4952 return !infinite_cost_p (cost) && can_autoinc;
4955 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4956 use that allows autoincrement, and set their AINC_USE if possible. */
4958 static void
4959 set_autoinc_for_original_candidates (struct ivopts_data *data)
4961 unsigned i, j;
4963 for (i = 0; i < n_iv_cands (data); i++)
4965 struct iv_cand *cand = iv_cand (data, i);
4966 struct iv_use *closest_before = NULL;
4967 struct iv_use *closest_after = NULL;
4968 if (cand->pos != IP_ORIGINAL)
4969 continue;
4971 for (j = 0; j < n_iv_uses (data); j++)
4973 struct iv_use *use = iv_use (data, j);
4974 unsigned uid = gimple_uid (use->stmt);
4976 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4977 continue;
4979 if (uid < gimple_uid (cand->incremented_at)
4980 && (closest_before == NULL
4981 || uid > gimple_uid (closest_before->stmt)))
4982 closest_before = use;
4984 if (uid > gimple_uid (cand->incremented_at)
4985 && (closest_after == NULL
4986 || uid < gimple_uid (closest_after->stmt)))
4987 closest_after = use;
4990 if (closest_before != NULL
4991 && autoinc_possible_for_pair (data, closest_before, cand))
4992 cand->ainc_use = closest_before;
4993 else if (closest_after != NULL
4994 && autoinc_possible_for_pair (data, closest_after, cand))
4995 cand->ainc_use = closest_after;
4999 /* Finds the candidates for the induction variables. */
5001 static void
5002 find_iv_candidates (struct ivopts_data *data)
5004 /* Add commonly used ivs. */
5005 add_standard_iv_candidates (data);
5007 /* Add old induction variables. */
5008 add_old_ivs_candidates (data);
5010 /* Add induction variables derived from uses. */
5011 add_derived_ivs_candidates (data);
5013 set_autoinc_for_original_candidates (data);
5015 /* Record the important candidates. */
5016 record_important_candidates (data);
5019 /* Determines costs of basing the use of the iv on an iv candidate. */
5021 static void
5022 determine_use_iv_costs (struct ivopts_data *data)
5024 unsigned i, j;
5025 struct iv_use *use;
5026 struct iv_cand *cand;
5027 bitmap to_clear = BITMAP_ALLOC (NULL);
5029 alloc_use_cost_map (data);
5031 for (i = 0; i < n_iv_uses (data); i++)
5033 use = iv_use (data, i);
5035 if (data->consider_all_candidates)
5037 for (j = 0; j < n_iv_cands (data); j++)
5039 cand = iv_cand (data, j);
5040 determine_use_iv_cost (data, use, cand);
5043 else
5045 bitmap_iterator bi;
5047 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5049 cand = iv_cand (data, j);
5050 if (!determine_use_iv_cost (data, use, cand))
5051 bitmap_set_bit (to_clear, j);
5054 /* Remove the candidates for that the cost is infinite from
5055 the list of related candidates. */
5056 bitmap_and_compl_into (use->related_cands, to_clear);
5057 bitmap_clear (to_clear);
5061 BITMAP_FREE (to_clear);
5063 if (dump_file && (dump_flags & TDF_DETAILS))
5065 fprintf (dump_file, "Use-candidate costs:\n");
5067 for (i = 0; i < n_iv_uses (data); i++)
5069 use = iv_use (data, i);
5071 fprintf (dump_file, "Use %d:\n", i);
5072 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5073 for (j = 0; j < use->n_map_members; j++)
5075 if (!use->cost_map[j].cand
5076 || infinite_cost_p (use->cost_map[j].cost))
5077 continue;
5079 fprintf (dump_file, " %d\t%d\t%d\t",
5080 use->cost_map[j].cand->id,
5081 use->cost_map[j].cost.cost,
5082 use->cost_map[j].cost.complexity);
5083 if (use->cost_map[j].depends_on)
5084 bitmap_print (dump_file,
5085 use->cost_map[j].depends_on, "","");
5086 if (use->cost_map[j].inv_expr_id != -1)
5087 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5088 fprintf (dump_file, "\n");
5091 fprintf (dump_file, "\n");
5093 fprintf (dump_file, "\n");
5097 /* Determines cost of the candidate CAND. */
5099 static void
5100 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5102 comp_cost cost_base;
5103 unsigned cost, cost_step;
5104 tree base;
5106 if (!cand->iv)
5108 cand->cost = 0;
5109 return;
5112 /* There are two costs associated with the candidate -- its increment
5113 and its initialization. The second is almost negligible for any loop
5114 that rolls enough, so we take it just very little into account. */
5116 base = cand->iv->base;
5117 cost_base = force_var_cost (data, base, NULL);
5118 /* It will be exceptional that the iv register happens to be initialized with
5119 the proper value at no cost. In general, there will at least be a regcopy
5120 or a const set. */
5121 if (cost_base.cost == 0)
5122 cost_base.cost = COSTS_N_INSNS (1);
5123 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5125 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5127 /* Prefer the original ivs unless we may gain something by replacing it.
5128 The reason is to make debugging simpler; so this is not relevant for
5129 artificial ivs created by other optimization passes. */
5130 if (cand->pos != IP_ORIGINAL
5131 || !SSA_NAME_VAR (cand->var_before)
5132 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5133 cost++;
5135 /* Prefer not to insert statements into latch unless there are some
5136 already (so that we do not create unnecessary jumps). */
5137 if (cand->pos == IP_END
5138 && empty_block_p (ip_end_pos (data->current_loop)))
5139 cost++;
5141 cand->cost = cost;
5142 cand->cost_step = cost_step;
5145 /* Determines costs of computation of the candidates. */
5147 static void
5148 determine_iv_costs (struct ivopts_data *data)
5150 unsigned i;
5152 if (dump_file && (dump_flags & TDF_DETAILS))
5154 fprintf (dump_file, "Candidate costs:\n");
5155 fprintf (dump_file, " cand\tcost\n");
5158 for (i = 0; i < n_iv_cands (data); i++)
5160 struct iv_cand *cand = iv_cand (data, i);
5162 determine_iv_cost (data, cand);
5164 if (dump_file && (dump_flags & TDF_DETAILS))
5165 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5168 if (dump_file && (dump_flags & TDF_DETAILS))
5169 fprintf (dump_file, "\n");
5172 /* Calculates cost for having SIZE induction variables. */
5174 static unsigned
5175 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5177 /* We add size to the cost, so that we prefer eliminating ivs
5178 if possible. */
5179 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5180 data->body_includes_call);
5183 /* For each size of the induction variable set determine the penalty. */
5185 static void
5186 determine_set_costs (struct ivopts_data *data)
5188 unsigned j, n;
5189 gimple_phi phi;
5190 gimple_phi_iterator psi;
5191 tree op;
5192 struct loop *loop = data->current_loop;
5193 bitmap_iterator bi;
5195 if (dump_file && (dump_flags & TDF_DETAILS))
5197 fprintf (dump_file, "Global costs:\n");
5198 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5199 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5200 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5201 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5204 n = 0;
5205 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5207 phi = psi.phi ();
5208 op = PHI_RESULT (phi);
5210 if (virtual_operand_p (op))
5211 continue;
5213 if (get_iv (data, op))
5214 continue;
5216 n++;
5219 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5221 struct version_info *info = ver_info (data, j);
5223 if (info->inv_id && info->has_nonlin_use)
5224 n++;
5227 data->regs_used = n;
5228 if (dump_file && (dump_flags & TDF_DETAILS))
5229 fprintf (dump_file, " regs_used %d\n", n);
5231 if (dump_file && (dump_flags & TDF_DETAILS))
5233 fprintf (dump_file, " cost for size:\n");
5234 fprintf (dump_file, " ivs\tcost\n");
5235 for (j = 0; j <= 2 * target_avail_regs; j++)
5236 fprintf (dump_file, " %d\t%d\n", j,
5237 ivopts_global_cost_for_size (data, j));
5238 fprintf (dump_file, "\n");
5242 /* Returns true if A is a cheaper cost pair than B. */
5244 static bool
5245 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5247 int cmp;
5249 if (!a)
5250 return false;
5252 if (!b)
5253 return true;
5255 cmp = compare_costs (a->cost, b->cost);
5256 if (cmp < 0)
5257 return true;
5259 if (cmp > 0)
5260 return false;
5262 /* In case the costs are the same, prefer the cheaper candidate. */
5263 if (a->cand->cost < b->cand->cost)
5264 return true;
5266 return false;
5270 /* Returns candidate by that USE is expressed in IVS. */
5272 static struct cost_pair *
5273 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5275 return ivs->cand_for_use[use->id];
5278 /* Computes the cost field of IVS structure. */
5280 static void
5281 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5283 comp_cost cost = ivs->cand_use_cost;
5285 cost.cost += ivs->cand_cost;
5287 cost.cost += ivopts_global_cost_for_size (data,
5288 ivs->n_regs + ivs->num_used_inv_expr);
5290 ivs->cost = cost;
5293 /* Remove invariants in set INVS to set IVS. */
5295 static void
5296 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5298 bitmap_iterator bi;
5299 unsigned iid;
5301 if (!invs)
5302 return;
5304 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5306 ivs->n_invariant_uses[iid]--;
5307 if (ivs->n_invariant_uses[iid] == 0)
5308 ivs->n_regs--;
5312 /* Set USE not to be expressed by any candidate in IVS. */
5314 static void
5315 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5316 struct iv_use *use)
5318 unsigned uid = use->id, cid;
5319 struct cost_pair *cp;
5321 cp = ivs->cand_for_use[uid];
5322 if (!cp)
5323 return;
5324 cid = cp->cand->id;
5326 ivs->bad_uses++;
5327 ivs->cand_for_use[uid] = NULL;
5328 ivs->n_cand_uses[cid]--;
5330 if (ivs->n_cand_uses[cid] == 0)
5332 bitmap_clear_bit (ivs->cands, cid);
5333 /* Do not count the pseudocandidates. */
5334 if (cp->cand->iv)
5335 ivs->n_regs--;
5336 ivs->n_cands--;
5337 ivs->cand_cost -= cp->cand->cost;
5339 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5342 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5344 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5346 if (cp->inv_expr_id != -1)
5348 ivs->used_inv_expr[cp->inv_expr_id]--;
5349 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5350 ivs->num_used_inv_expr--;
5352 iv_ca_recount_cost (data, ivs);
5355 /* Add invariants in set INVS to set IVS. */
5357 static void
5358 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5360 bitmap_iterator bi;
5361 unsigned iid;
5363 if (!invs)
5364 return;
5366 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5368 ivs->n_invariant_uses[iid]++;
5369 if (ivs->n_invariant_uses[iid] == 1)
5370 ivs->n_regs++;
5374 /* Set cost pair for USE in set IVS to CP. */
5376 static void
5377 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5378 struct iv_use *use, struct cost_pair *cp)
5380 unsigned uid = use->id, cid;
5382 if (ivs->cand_for_use[uid] == cp)
5383 return;
5385 if (ivs->cand_for_use[uid])
5386 iv_ca_set_no_cp (data, ivs, use);
5388 if (cp)
5390 cid = cp->cand->id;
5392 ivs->bad_uses--;
5393 ivs->cand_for_use[uid] = cp;
5394 ivs->n_cand_uses[cid]++;
5395 if (ivs->n_cand_uses[cid] == 1)
5397 bitmap_set_bit (ivs->cands, cid);
5398 /* Do not count the pseudocandidates. */
5399 if (cp->cand->iv)
5400 ivs->n_regs++;
5401 ivs->n_cands++;
5402 ivs->cand_cost += cp->cand->cost;
5404 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5407 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5408 iv_ca_set_add_invariants (ivs, cp->depends_on);
5410 if (cp->inv_expr_id != -1)
5412 ivs->used_inv_expr[cp->inv_expr_id]++;
5413 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5414 ivs->num_used_inv_expr++;
5416 iv_ca_recount_cost (data, ivs);
5420 /* Extend set IVS by expressing USE by some of the candidates in it
5421 if possible. Consider all important candidates if candidates in
5422 set IVS don't give any result. */
5424 static void
5425 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5426 struct iv_use *use)
5428 struct cost_pair *best_cp = NULL, *cp;
5429 bitmap_iterator bi;
5430 unsigned i;
5431 struct iv_cand *cand;
5433 gcc_assert (ivs->upto >= use->id);
5434 ivs->upto++;
5435 ivs->bad_uses++;
5437 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5439 cand = iv_cand (data, i);
5440 cp = get_use_iv_cost (data, use, cand);
5441 if (cheaper_cost_pair (cp, best_cp))
5442 best_cp = cp;
5445 if (best_cp == NULL)
5447 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5449 cand = iv_cand (data, i);
5450 cp = get_use_iv_cost (data, use, cand);
5451 if (cheaper_cost_pair (cp, best_cp))
5452 best_cp = cp;
5456 iv_ca_set_cp (data, ivs, use, best_cp);
5459 /* Get cost for assignment IVS. */
5461 static comp_cost
5462 iv_ca_cost (struct iv_ca *ivs)
5464 /* This was a conditional expression but it triggered a bug in
5465 Sun C 5.5. */
5466 if (ivs->bad_uses)
5467 return infinite_cost;
5468 else
5469 return ivs->cost;
5472 /* Returns true if all dependences of CP are among invariants in IVS. */
5474 static bool
5475 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5477 unsigned i;
5478 bitmap_iterator bi;
5480 if (!cp->depends_on)
5481 return true;
5483 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5485 if (ivs->n_invariant_uses[i] == 0)
5486 return false;
5489 return true;
5492 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5493 it before NEXT_CHANGE. */
5495 static struct iv_ca_delta *
5496 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5497 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5499 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5501 change->use = use;
5502 change->old_cp = old_cp;
5503 change->new_cp = new_cp;
5504 change->next_change = next_change;
5506 return change;
5509 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5510 are rewritten. */
5512 static struct iv_ca_delta *
5513 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5515 struct iv_ca_delta *last;
5517 if (!l2)
5518 return l1;
5520 if (!l1)
5521 return l2;
5523 for (last = l1; last->next_change; last = last->next_change)
5524 continue;
5525 last->next_change = l2;
5527 return l1;
5530 /* Reverse the list of changes DELTA, forming the inverse to it. */
5532 static struct iv_ca_delta *
5533 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5535 struct iv_ca_delta *act, *next, *prev = NULL;
5536 struct cost_pair *tmp;
5538 for (act = delta; act; act = next)
5540 next = act->next_change;
5541 act->next_change = prev;
5542 prev = act;
5544 tmp = act->old_cp;
5545 act->old_cp = act->new_cp;
5546 act->new_cp = tmp;
5549 return prev;
5552 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5553 reverted instead. */
5555 static void
5556 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5557 struct iv_ca_delta *delta, bool forward)
5559 struct cost_pair *from, *to;
5560 struct iv_ca_delta *act;
5562 if (!forward)
5563 delta = iv_ca_delta_reverse (delta);
5565 for (act = delta; act; act = act->next_change)
5567 from = act->old_cp;
5568 to = act->new_cp;
5569 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5570 iv_ca_set_cp (data, ivs, act->use, to);
5573 if (!forward)
5574 iv_ca_delta_reverse (delta);
5577 /* Returns true if CAND is used in IVS. */
5579 static bool
5580 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5582 return ivs->n_cand_uses[cand->id] > 0;
5585 /* Returns number of induction variable candidates in the set IVS. */
5587 static unsigned
5588 iv_ca_n_cands (struct iv_ca *ivs)
5590 return ivs->n_cands;
5593 /* Free the list of changes DELTA. */
5595 static void
5596 iv_ca_delta_free (struct iv_ca_delta **delta)
5598 struct iv_ca_delta *act, *next;
5600 for (act = *delta; act; act = next)
5602 next = act->next_change;
5603 free (act);
5606 *delta = NULL;
5609 /* Allocates new iv candidates assignment. */
5611 static struct iv_ca *
5612 iv_ca_new (struct ivopts_data *data)
5614 struct iv_ca *nw = XNEW (struct iv_ca);
5616 nw->upto = 0;
5617 nw->bad_uses = 0;
5618 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5619 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5620 nw->cands = BITMAP_ALLOC (NULL);
5621 nw->n_cands = 0;
5622 nw->n_regs = 0;
5623 nw->cand_use_cost = no_cost;
5624 nw->cand_cost = 0;
5625 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5626 nw->cost = no_cost;
5627 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5628 nw->num_used_inv_expr = 0;
5630 return nw;
5633 /* Free memory occupied by the set IVS. */
5635 static void
5636 iv_ca_free (struct iv_ca **ivs)
5638 free ((*ivs)->cand_for_use);
5639 free ((*ivs)->n_cand_uses);
5640 BITMAP_FREE ((*ivs)->cands);
5641 free ((*ivs)->n_invariant_uses);
5642 free ((*ivs)->used_inv_expr);
5643 free (*ivs);
5644 *ivs = NULL;
5647 /* Dumps IVS to FILE. */
5649 static void
5650 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5652 const char *pref = " invariants ";
5653 unsigned i;
5654 comp_cost cost = iv_ca_cost (ivs);
5656 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5657 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5658 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5659 bitmap_print (file, ivs->cands, " candidates: ","\n");
5661 for (i = 0; i < ivs->upto; i++)
5663 struct iv_use *use = iv_use (data, i);
5664 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5665 if (cp)
5666 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5667 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5668 else
5669 fprintf (file, " use:%d --> ??\n", use->id);
5672 for (i = 1; i <= data->max_inv_id; i++)
5673 if (ivs->n_invariant_uses[i])
5675 fprintf (file, "%s%d", pref, i);
5676 pref = ", ";
5678 fprintf (file, "\n\n");
5681 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5682 new set, and store differences in DELTA. Number of induction variables
5683 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5684 the function will try to find a solution with mimimal iv candidates. */
5686 static comp_cost
5687 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5688 struct iv_cand *cand, struct iv_ca_delta **delta,
5689 unsigned *n_ivs, bool min_ncand)
5691 unsigned i;
5692 comp_cost cost;
5693 struct iv_use *use;
5694 struct cost_pair *old_cp, *new_cp;
5696 *delta = NULL;
5697 for (i = 0; i < ivs->upto; i++)
5699 use = iv_use (data, i);
5700 old_cp = iv_ca_cand_for_use (ivs, use);
5702 if (old_cp
5703 && old_cp->cand == cand)
5704 continue;
5706 new_cp = get_use_iv_cost (data, use, cand);
5707 if (!new_cp)
5708 continue;
5710 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5711 continue;
5713 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5714 continue;
5716 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5719 iv_ca_delta_commit (data, ivs, *delta, true);
5720 cost = iv_ca_cost (ivs);
5721 if (n_ivs)
5722 *n_ivs = iv_ca_n_cands (ivs);
5723 iv_ca_delta_commit (data, ivs, *delta, false);
5725 return cost;
5728 /* Try narrowing set IVS by removing CAND. Return the cost of
5729 the new set and store the differences in DELTA. START is
5730 the candidate with which we start narrowing. */
5732 static comp_cost
5733 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5734 struct iv_cand *cand, struct iv_cand *start,
5735 struct iv_ca_delta **delta)
5737 unsigned i, ci;
5738 struct iv_use *use;
5739 struct cost_pair *old_cp, *new_cp, *cp;
5740 bitmap_iterator bi;
5741 struct iv_cand *cnd;
5742 comp_cost cost, best_cost, acost;
5744 *delta = NULL;
5745 for (i = 0; i < n_iv_uses (data); i++)
5747 use = iv_use (data, i);
5749 old_cp = iv_ca_cand_for_use (ivs, use);
5750 if (old_cp->cand != cand)
5751 continue;
5753 best_cost = iv_ca_cost (ivs);
5754 /* Start narrowing with START. */
5755 new_cp = get_use_iv_cost (data, use, start);
5757 if (data->consider_all_candidates)
5759 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5761 if (ci == cand->id || (start && ci == start->id))
5762 continue;
5764 cnd = iv_cand (data, ci);
5766 cp = get_use_iv_cost (data, use, cnd);
5767 if (!cp)
5768 continue;
5770 iv_ca_set_cp (data, ivs, use, cp);
5771 acost = iv_ca_cost (ivs);
5773 if (compare_costs (acost, best_cost) < 0)
5775 best_cost = acost;
5776 new_cp = cp;
5780 else
5782 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5784 if (ci == cand->id || (start && ci == start->id))
5785 continue;
5787 cnd = iv_cand (data, ci);
5789 cp = get_use_iv_cost (data, use, cnd);
5790 if (!cp)
5791 continue;
5793 iv_ca_set_cp (data, ivs, use, cp);
5794 acost = iv_ca_cost (ivs);
5796 if (compare_costs (acost, best_cost) < 0)
5798 best_cost = acost;
5799 new_cp = cp;
5803 /* Restore to old cp for use. */
5804 iv_ca_set_cp (data, ivs, use, old_cp);
5806 if (!new_cp)
5808 iv_ca_delta_free (delta);
5809 return infinite_cost;
5812 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5815 iv_ca_delta_commit (data, ivs, *delta, true);
5816 cost = iv_ca_cost (ivs);
5817 iv_ca_delta_commit (data, ivs, *delta, false);
5819 return cost;
5822 /* Try optimizing the set of candidates IVS by removing candidates different
5823 from to EXCEPT_CAND from it. Return cost of the new set, and store
5824 differences in DELTA. */
5826 static comp_cost
5827 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5828 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5830 bitmap_iterator bi;
5831 struct iv_ca_delta *act_delta, *best_delta;
5832 unsigned i;
5833 comp_cost best_cost, acost;
5834 struct iv_cand *cand;
5836 best_delta = NULL;
5837 best_cost = iv_ca_cost (ivs);
5839 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5841 cand = iv_cand (data, i);
5843 if (cand == except_cand)
5844 continue;
5846 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5848 if (compare_costs (acost, best_cost) < 0)
5850 best_cost = acost;
5851 iv_ca_delta_free (&best_delta);
5852 best_delta = act_delta;
5854 else
5855 iv_ca_delta_free (&act_delta);
5858 if (!best_delta)
5860 *delta = NULL;
5861 return best_cost;
5864 /* Recurse to possibly remove other unnecessary ivs. */
5865 iv_ca_delta_commit (data, ivs, best_delta, true);
5866 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5867 iv_ca_delta_commit (data, ivs, best_delta, false);
5868 *delta = iv_ca_delta_join (best_delta, *delta);
5869 return best_cost;
5872 /* Tries to extend the sets IVS in the best possible way in order
5873 to express the USE. If ORIGINALP is true, prefer candidates from
5874 the original set of IVs, otherwise favor important candidates not
5875 based on any memory object. */
5877 static bool
5878 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5879 struct iv_use *use, bool originalp)
5881 comp_cost best_cost, act_cost;
5882 unsigned i;
5883 bitmap_iterator bi;
5884 struct iv_cand *cand;
5885 struct iv_ca_delta *best_delta = NULL, *act_delta;
5886 struct cost_pair *cp;
5888 iv_ca_add_use (data, ivs, use);
5889 best_cost = iv_ca_cost (ivs);
5890 cp = iv_ca_cand_for_use (ivs, use);
5891 if (cp)
5893 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5894 iv_ca_set_no_cp (data, ivs, use);
5897 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5898 first try important candidates not based on any memory object. Only if
5899 this fails, try the specific ones. Rationale -- in loops with many
5900 variables the best choice often is to use just one generic biv. If we
5901 added here many ivs specific to the uses, the optimization algorithm later
5902 would be likely to get stuck in a local minimum, thus causing us to create
5903 too many ivs. The approach from few ivs to more seems more likely to be
5904 successful -- starting from few ivs, replacing an expensive use by a
5905 specific iv should always be a win. */
5906 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5908 cand = iv_cand (data, i);
5910 if (originalp && cand->pos !=IP_ORIGINAL)
5911 continue;
5913 if (!originalp && cand->iv->base_object != NULL_TREE)
5914 continue;
5916 if (iv_ca_cand_used_p (ivs, cand))
5917 continue;
5919 cp = get_use_iv_cost (data, use, cand);
5920 if (!cp)
5921 continue;
5923 iv_ca_set_cp (data, ivs, use, cp);
5924 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5925 true);
5926 iv_ca_set_no_cp (data, ivs, use);
5927 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5929 if (compare_costs (act_cost, best_cost) < 0)
5931 best_cost = act_cost;
5933 iv_ca_delta_free (&best_delta);
5934 best_delta = act_delta;
5936 else
5937 iv_ca_delta_free (&act_delta);
5940 if (infinite_cost_p (best_cost))
5942 for (i = 0; i < use->n_map_members; i++)
5944 cp = use->cost_map + i;
5945 cand = cp->cand;
5946 if (!cand)
5947 continue;
5949 /* Already tried this. */
5950 if (cand->important)
5952 if (originalp && cand->pos == IP_ORIGINAL)
5953 continue;
5954 if (!originalp && cand->iv->base_object == NULL_TREE)
5955 continue;
5958 if (iv_ca_cand_used_p (ivs, cand))
5959 continue;
5961 act_delta = NULL;
5962 iv_ca_set_cp (data, ivs, use, cp);
5963 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5964 iv_ca_set_no_cp (data, ivs, use);
5965 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5966 cp, act_delta);
5968 if (compare_costs (act_cost, best_cost) < 0)
5970 best_cost = act_cost;
5972 if (best_delta)
5973 iv_ca_delta_free (&best_delta);
5974 best_delta = act_delta;
5976 else
5977 iv_ca_delta_free (&act_delta);
5981 iv_ca_delta_commit (data, ivs, best_delta, true);
5982 iv_ca_delta_free (&best_delta);
5984 return !infinite_cost_p (best_cost);
5987 /* Finds an initial assignment of candidates to uses. */
5989 static struct iv_ca *
5990 get_initial_solution (struct ivopts_data *data, bool originalp)
5992 struct iv_ca *ivs = iv_ca_new (data);
5993 unsigned i;
5995 for (i = 0; i < n_iv_uses (data); i++)
5996 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5998 iv_ca_free (&ivs);
5999 return NULL;
6002 return ivs;
6005 /* Tries to improve set of induction variables IVS. */
6007 static bool
6008 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6010 unsigned i, n_ivs;
6011 comp_cost acost, best_cost = iv_ca_cost (ivs);
6012 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6013 struct iv_cand *cand;
6015 /* Try extending the set of induction variables by one. */
6016 for (i = 0; i < n_iv_cands (data); i++)
6018 cand = iv_cand (data, i);
6020 if (iv_ca_cand_used_p (ivs, cand))
6021 continue;
6023 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6024 if (!act_delta)
6025 continue;
6027 /* If we successfully added the candidate and the set is small enough,
6028 try optimizing it by removing other candidates. */
6029 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6031 iv_ca_delta_commit (data, ivs, act_delta, true);
6032 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6033 iv_ca_delta_commit (data, ivs, act_delta, false);
6034 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6037 if (compare_costs (acost, best_cost) < 0)
6039 best_cost = acost;
6040 iv_ca_delta_free (&best_delta);
6041 best_delta = act_delta;
6043 else
6044 iv_ca_delta_free (&act_delta);
6047 if (!best_delta)
6049 /* Try removing the candidates from the set instead. */
6050 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6052 /* Nothing more we can do. */
6053 if (!best_delta)
6054 return false;
6057 iv_ca_delta_commit (data, ivs, best_delta, true);
6058 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6059 iv_ca_delta_free (&best_delta);
6060 return true;
6063 /* Attempts to find the optimal set of induction variables. We do simple
6064 greedy heuristic -- we try to replace at most one candidate in the selected
6065 solution and remove the unused ivs while this improves the cost. */
6067 static struct iv_ca *
6068 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6070 struct iv_ca *set;
6072 /* Get the initial solution. */
6073 set = get_initial_solution (data, originalp);
6074 if (!set)
6076 if (dump_file && (dump_flags & TDF_DETAILS))
6077 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6078 return NULL;
6081 if (dump_file && (dump_flags & TDF_DETAILS))
6083 fprintf (dump_file, "Initial set of candidates:\n");
6084 iv_ca_dump (data, dump_file, set);
6087 while (try_improve_iv_set (data, set))
6089 if (dump_file && (dump_flags & TDF_DETAILS))
6091 fprintf (dump_file, "Improved to:\n");
6092 iv_ca_dump (data, dump_file, set);
6096 return set;
6099 static struct iv_ca *
6100 find_optimal_iv_set (struct ivopts_data *data)
6102 unsigned i;
6103 struct iv_ca *set, *origset;
6104 struct iv_use *use;
6105 comp_cost cost, origcost;
6107 /* Determine the cost based on a strategy that starts with original IVs,
6108 and try again using a strategy that prefers candidates not based
6109 on any IVs. */
6110 origset = find_optimal_iv_set_1 (data, true);
6111 set = find_optimal_iv_set_1 (data, false);
6113 if (!origset && !set)
6114 return NULL;
6116 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6117 cost = set ? iv_ca_cost (set) : infinite_cost;
6119 if (dump_file && (dump_flags & TDF_DETAILS))
6121 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6122 origcost.cost, origcost.complexity);
6123 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6124 cost.cost, cost.complexity);
6127 /* Choose the one with the best cost. */
6128 if (compare_costs (origcost, cost) <= 0)
6130 if (set)
6131 iv_ca_free (&set);
6132 set = origset;
6134 else if (origset)
6135 iv_ca_free (&origset);
6137 for (i = 0; i < n_iv_uses (data); i++)
6139 use = iv_use (data, i);
6140 use->selected = iv_ca_cand_for_use (set, use)->cand;
6143 return set;
6146 /* Creates a new induction variable corresponding to CAND. */
6148 static void
6149 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6151 gimple_stmt_iterator incr_pos;
6152 tree base;
6153 bool after = false;
6155 if (!cand->iv)
6156 return;
6158 switch (cand->pos)
6160 case IP_NORMAL:
6161 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6162 break;
6164 case IP_END:
6165 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6166 after = true;
6167 break;
6169 case IP_AFTER_USE:
6170 after = true;
6171 /* fall through */
6172 case IP_BEFORE_USE:
6173 incr_pos = gsi_for_stmt (cand->incremented_at);
6174 break;
6176 case IP_ORIGINAL:
6177 /* Mark that the iv is preserved. */
6178 name_info (data, cand->var_before)->preserve_biv = true;
6179 name_info (data, cand->var_after)->preserve_biv = true;
6181 /* Rewrite the increment so that it uses var_before directly. */
6182 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6183 return;
6186 gimple_add_tmp_var (cand->var_before);
6188 base = unshare_expr (cand->iv->base);
6190 create_iv (base, unshare_expr (cand->iv->step),
6191 cand->var_before, data->current_loop,
6192 &incr_pos, after, &cand->var_before, &cand->var_after);
6195 /* Creates new induction variables described in SET. */
6197 static void
6198 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6200 unsigned i;
6201 struct iv_cand *cand;
6202 bitmap_iterator bi;
6204 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6206 cand = iv_cand (data, i);
6207 create_new_iv (data, cand);
6210 if (dump_file && (dump_flags & TDF_DETAILS))
6212 fprintf (dump_file, "\nSelected IV set: \n");
6213 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6215 cand = iv_cand (data, i);
6216 dump_cand (dump_file, cand);
6218 fprintf (dump_file, "\n");
6222 /* Rewrites USE (definition of iv used in a nonlinear expression)
6223 using candidate CAND. */
6225 static void
6226 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6227 struct iv_use *use, struct iv_cand *cand)
6229 tree comp;
6230 tree op, tgt;
6231 gimple_assign ass;
6232 gimple_stmt_iterator bsi;
6234 /* An important special case -- if we are asked to express value of
6235 the original iv by itself, just exit; there is no need to
6236 introduce a new computation (that might also need casting the
6237 variable to unsigned and back). */
6238 if (cand->pos == IP_ORIGINAL
6239 && cand->incremented_at == use->stmt)
6241 enum tree_code stmt_code;
6243 gcc_assert (is_gimple_assign (use->stmt));
6244 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6246 /* Check whether we may leave the computation unchanged.
6247 This is the case only if it does not rely on other
6248 computations in the loop -- otherwise, the computation
6249 we rely upon may be removed in remove_unused_ivs,
6250 thus leading to ICE. */
6251 stmt_code = gimple_assign_rhs_code (use->stmt);
6252 if (stmt_code == PLUS_EXPR
6253 || stmt_code == MINUS_EXPR
6254 || stmt_code == POINTER_PLUS_EXPR)
6256 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6257 op = gimple_assign_rhs2 (use->stmt);
6258 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6259 op = gimple_assign_rhs1 (use->stmt);
6260 else
6261 op = NULL_TREE;
6263 else
6264 op = NULL_TREE;
6266 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6267 return;
6270 comp = get_computation (data->current_loop, use, cand);
6271 gcc_assert (comp != NULL_TREE);
6273 switch (gimple_code (use->stmt))
6275 case GIMPLE_PHI:
6276 tgt = PHI_RESULT (use->stmt);
6278 /* If we should keep the biv, do not replace it. */
6279 if (name_info (data, tgt)->preserve_biv)
6280 return;
6282 bsi = gsi_after_labels (gimple_bb (use->stmt));
6283 break;
6285 case GIMPLE_ASSIGN:
6286 tgt = gimple_assign_lhs (use->stmt);
6287 bsi = gsi_for_stmt (use->stmt);
6288 break;
6290 default:
6291 gcc_unreachable ();
6294 if (!valid_gimple_rhs_p (comp)
6295 || (gimple_code (use->stmt) != GIMPLE_PHI
6296 /* We can't allow re-allocating the stmt as it might be pointed
6297 to still. */
6298 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6299 >= gimple_num_ops (gsi_stmt (bsi)))))
6301 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6302 true, GSI_SAME_STMT);
6303 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6305 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6306 /* As this isn't a plain copy we have to reset alignment
6307 information. */
6308 if (SSA_NAME_PTR_INFO (comp))
6309 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6313 if (gimple_code (use->stmt) == GIMPLE_PHI)
6315 ass = gimple_build_assign (tgt, comp);
6316 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6318 bsi = gsi_for_stmt (use->stmt);
6319 remove_phi_node (&bsi, false);
6321 else
6323 gimple_assign_set_rhs_from_tree (&bsi, comp);
6324 use->stmt = gsi_stmt (bsi);
6328 /* Performs a peephole optimization to reorder the iv update statement with
6329 a mem ref to enable instruction combining in later phases. The mem ref uses
6330 the iv value before the update, so the reordering transformation requires
6331 adjustment of the offset. CAND is the selected IV_CAND.
6333 Example:
6335 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6336 iv2 = iv1 + 1;
6338 if (t < val) (1)
6339 goto L;
6340 goto Head;
6343 directly propagating t over to (1) will introduce overlapping live range
6344 thus increase register pressure. This peephole transform it into:
6347 iv2 = iv1 + 1;
6348 t = MEM_REF (base, iv2, 8, 8);
6349 if (t < val)
6350 goto L;
6351 goto Head;
6354 static void
6355 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6357 tree var_after;
6358 gimple iv_update, stmt;
6359 basic_block bb;
6360 gimple_stmt_iterator gsi, gsi_iv;
6362 if (cand->pos != IP_NORMAL)
6363 return;
6365 var_after = cand->var_after;
6366 iv_update = SSA_NAME_DEF_STMT (var_after);
6368 bb = gimple_bb (iv_update);
6369 gsi = gsi_last_nondebug_bb (bb);
6370 stmt = gsi_stmt (gsi);
6372 /* Only handle conditional statement for now. */
6373 if (gimple_code (stmt) != GIMPLE_COND)
6374 return;
6376 gsi_prev_nondebug (&gsi);
6377 stmt = gsi_stmt (gsi);
6378 if (stmt != iv_update)
6379 return;
6381 gsi_prev_nondebug (&gsi);
6382 if (gsi_end_p (gsi))
6383 return;
6385 stmt = gsi_stmt (gsi);
6386 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6387 return;
6389 if (stmt != use->stmt)
6390 return;
6392 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6393 return;
6395 if (dump_file && (dump_flags & TDF_DETAILS))
6397 fprintf (dump_file, "Reordering \n");
6398 print_gimple_stmt (dump_file, iv_update, 0, 0);
6399 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6400 fprintf (dump_file, "\n");
6403 gsi = gsi_for_stmt (use->stmt);
6404 gsi_iv = gsi_for_stmt (iv_update);
6405 gsi_move_before (&gsi_iv, &gsi);
6407 cand->pos = IP_BEFORE_USE;
6408 cand->incremented_at = use->stmt;
6411 /* Rewrites USE (address that is an iv) using candidate CAND. */
6413 static void
6414 rewrite_use_address (struct ivopts_data *data,
6415 struct iv_use *use, struct iv_cand *cand)
6417 aff_tree aff;
6418 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6419 tree base_hint = NULL_TREE;
6420 tree ref, iv;
6421 bool ok;
6423 adjust_iv_update_pos (cand, use);
6424 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6425 gcc_assert (ok);
6426 unshare_aff_combination (&aff);
6428 /* To avoid undefined overflow problems, all IV candidates use unsigned
6429 integer types. The drawback is that this makes it impossible for
6430 create_mem_ref to distinguish an IV that is based on a memory object
6431 from one that represents simply an offset.
6433 To work around this problem, we pass a hint to create_mem_ref that
6434 indicates which variable (if any) in aff is an IV based on a memory
6435 object. Note that we only consider the candidate. If this is not
6436 based on an object, the base of the reference is in some subexpression
6437 of the use -- but these will use pointer types, so they are recognized
6438 by the create_mem_ref heuristics anyway. */
6439 if (cand->iv->base_object)
6440 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6442 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6443 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6444 reference_alias_ptr_type (*use->op_p),
6445 iv, base_hint, data->speed);
6446 copy_ref_info (ref, *use->op_p);
6447 *use->op_p = ref;
6450 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6451 candidate CAND. */
6453 static void
6454 rewrite_use_compare (struct ivopts_data *data,
6455 struct iv_use *use, struct iv_cand *cand)
6457 tree comp, *var_p, op, bound;
6458 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6459 enum tree_code compare;
6460 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6461 bool ok;
6463 bound = cp->value;
6464 if (bound)
6466 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6467 tree var_type = TREE_TYPE (var);
6468 gimple_seq stmts;
6470 if (dump_file && (dump_flags & TDF_DETAILS))
6472 fprintf (dump_file, "Replacing exit test: ");
6473 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6475 compare = cp->comp;
6476 bound = unshare_expr (fold_convert (var_type, bound));
6477 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6478 if (stmts)
6479 gsi_insert_seq_on_edge_immediate (
6480 loop_preheader_edge (data->current_loop),
6481 stmts);
6483 gimple_cond cond_stmt = as_a <gimple_cond> (use->stmt);
6484 gimple_cond_set_lhs (cond_stmt, var);
6485 gimple_cond_set_code (cond_stmt, compare);
6486 gimple_cond_set_rhs (cond_stmt, op);
6487 return;
6490 /* The induction variable elimination failed; just express the original
6491 giv. */
6492 comp = get_computation (data->current_loop, use, cand);
6493 gcc_assert (comp != NULL_TREE);
6495 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6496 gcc_assert (ok);
6498 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6499 true, GSI_SAME_STMT);
6502 /* Rewrites USE using candidate CAND. */
6504 static void
6505 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6507 switch (use->type)
6509 case USE_NONLINEAR_EXPR:
6510 rewrite_use_nonlinear_expr (data, use, cand);
6511 break;
6513 case USE_ADDRESS:
6514 rewrite_use_address (data, use, cand);
6515 break;
6517 case USE_COMPARE:
6518 rewrite_use_compare (data, use, cand);
6519 break;
6521 default:
6522 gcc_unreachable ();
6525 update_stmt (use->stmt);
6528 /* Rewrite the uses using the selected induction variables. */
6530 static void
6531 rewrite_uses (struct ivopts_data *data)
6533 unsigned i;
6534 struct iv_cand *cand;
6535 struct iv_use *use;
6537 for (i = 0; i < n_iv_uses (data); i++)
6539 use = iv_use (data, i);
6540 cand = use->selected;
6541 gcc_assert (cand);
6543 rewrite_use (data, use, cand);
6547 /* Removes the ivs that are not used after rewriting. */
6549 static void
6550 remove_unused_ivs (struct ivopts_data *data)
6552 unsigned j;
6553 bitmap_iterator bi;
6554 bitmap toremove = BITMAP_ALLOC (NULL);
6556 /* Figure out an order in which to release SSA DEFs so that we don't
6557 release something that we'd have to propagate into a debug stmt
6558 afterwards. */
6559 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6561 struct version_info *info;
6563 info = ver_info (data, j);
6564 if (info->iv
6565 && !integer_zerop (info->iv->step)
6566 && !info->inv_id
6567 && !info->iv->have_use_for
6568 && !info->preserve_biv)
6570 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6572 tree def = info->iv->ssa_name;
6574 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6576 imm_use_iterator imm_iter;
6577 use_operand_p use_p;
6578 gimple stmt;
6579 int count = 0;
6581 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6583 if (!gimple_debug_bind_p (stmt))
6584 continue;
6586 /* We just want to determine whether to do nothing
6587 (count == 0), to substitute the computed
6588 expression into a single use of the SSA DEF by
6589 itself (count == 1), or to use a debug temp
6590 because the SSA DEF is used multiple times or as
6591 part of a larger expression (count > 1). */
6592 count++;
6593 if (gimple_debug_bind_get_value (stmt) != def)
6594 count++;
6596 if (count > 1)
6597 BREAK_FROM_IMM_USE_STMT (imm_iter);
6600 if (!count)
6601 continue;
6603 struct iv_use dummy_use;
6604 struct iv_cand *best_cand = NULL, *cand;
6605 unsigned i, best_pref = 0, cand_pref;
6607 memset (&dummy_use, 0, sizeof (dummy_use));
6608 dummy_use.iv = info->iv;
6609 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6611 cand = iv_use (data, i)->selected;
6612 if (cand == best_cand)
6613 continue;
6614 cand_pref = operand_equal_p (cand->iv->step,
6615 info->iv->step, 0)
6616 ? 4 : 0;
6617 cand_pref
6618 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6619 == TYPE_MODE (TREE_TYPE (info->iv->base))
6620 ? 2 : 0;
6621 cand_pref
6622 += TREE_CODE (cand->iv->base) == INTEGER_CST
6623 ? 1 : 0;
6624 if (best_cand == NULL || best_pref < cand_pref)
6626 best_cand = cand;
6627 best_pref = cand_pref;
6631 if (!best_cand)
6632 continue;
6634 tree comp = get_computation_at (data->current_loop,
6635 &dummy_use, best_cand,
6636 SSA_NAME_DEF_STMT (def));
6637 if (!comp)
6638 continue;
6640 if (count > 1)
6642 tree vexpr = make_node (DEBUG_EXPR_DECL);
6643 DECL_ARTIFICIAL (vexpr) = 1;
6644 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6645 if (SSA_NAME_VAR (def))
6646 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6647 else
6648 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6649 gimple_debug def_temp =
6650 gimple_build_debug_bind (vexpr, comp, NULL);
6651 gimple_stmt_iterator gsi;
6653 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6654 gsi = gsi_after_labels (gimple_bb
6655 (SSA_NAME_DEF_STMT (def)));
6656 else
6657 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6659 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6660 comp = vexpr;
6663 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6665 if (!gimple_debug_bind_p (stmt))
6666 continue;
6668 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6669 SET_USE (use_p, comp);
6671 update_stmt (stmt);
6677 release_defs_bitset (toremove);
6679 BITMAP_FREE (toremove);
6682 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6683 for hash_map::traverse. */
6685 bool
6686 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6688 free (value);
6689 return true;
6692 /* Frees data allocated by the optimization of a single loop. */
6694 static void
6695 free_loop_data (struct ivopts_data *data)
6697 unsigned i, j;
6698 bitmap_iterator bi;
6699 tree obj;
6701 if (data->niters)
6703 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6704 delete data->niters;
6705 data->niters = NULL;
6708 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6710 struct version_info *info;
6712 info = ver_info (data, i);
6713 free (info->iv);
6714 info->iv = NULL;
6715 info->has_nonlin_use = false;
6716 info->preserve_biv = false;
6717 info->inv_id = 0;
6719 bitmap_clear (data->relevant);
6720 bitmap_clear (data->important_candidates);
6722 for (i = 0; i < n_iv_uses (data); i++)
6724 struct iv_use *use = iv_use (data, i);
6726 free (use->iv);
6727 BITMAP_FREE (use->related_cands);
6728 for (j = 0; j < use->n_map_members; j++)
6729 if (use->cost_map[j].depends_on)
6730 BITMAP_FREE (use->cost_map[j].depends_on);
6731 free (use->cost_map);
6732 free (use);
6734 data->iv_uses.truncate (0);
6736 for (i = 0; i < n_iv_cands (data); i++)
6738 struct iv_cand *cand = iv_cand (data, i);
6740 free (cand->iv);
6741 if (cand->depends_on)
6742 BITMAP_FREE (cand->depends_on);
6743 free (cand);
6745 data->iv_candidates.truncate (0);
6747 if (data->version_info_size < num_ssa_names)
6749 data->version_info_size = 2 * num_ssa_names;
6750 free (data->version_info);
6751 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6754 data->max_inv_id = 0;
6756 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6757 SET_DECL_RTL (obj, NULL_RTX);
6759 decl_rtl_to_reset.truncate (0);
6761 data->inv_expr_tab->empty ();
6762 data->inv_expr_id = 0;
6765 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6766 loop tree. */
6768 static void
6769 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6771 free_loop_data (data);
6772 free (data->version_info);
6773 BITMAP_FREE (data->relevant);
6774 BITMAP_FREE (data->important_candidates);
6776 decl_rtl_to_reset.release ();
6777 data->iv_uses.release ();
6778 data->iv_candidates.release ();
6779 delete data->inv_expr_tab;
6780 data->inv_expr_tab = NULL;
6781 free_affine_expand_cache (&data->name_expansion_cache);
6784 /* Returns true if the loop body BODY includes any function calls. */
6786 static bool
6787 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6789 gimple_stmt_iterator gsi;
6790 unsigned i;
6792 for (i = 0; i < num_nodes; i++)
6793 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6795 gimple stmt = gsi_stmt (gsi);
6796 if (is_gimple_call (stmt)
6797 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6798 return true;
6800 return false;
6803 /* Optimizes the LOOP. Returns true if anything changed. */
6805 static bool
6806 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6808 bool changed = false;
6809 struct iv_ca *iv_ca;
6810 edge exit = single_dom_exit (loop);
6811 basic_block *body;
6813 gcc_assert (!data->niters);
6814 data->current_loop = loop;
6815 data->speed = optimize_loop_for_speed_p (loop);
6817 if (dump_file && (dump_flags & TDF_DETAILS))
6819 fprintf (dump_file, "Processing loop %d\n", loop->num);
6821 if (exit)
6823 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6824 exit->src->index, exit->dest->index);
6825 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6826 fprintf (dump_file, "\n");
6829 fprintf (dump_file, "\n");
6832 body = get_loop_body (loop);
6833 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6834 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6835 free (body);
6837 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6839 /* For each ssa name determines whether it behaves as an induction variable
6840 in some loop. */
6841 if (!find_induction_variables (data))
6842 goto finish;
6844 /* Finds interesting uses (item 1). */
6845 find_interesting_uses (data);
6846 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6847 goto finish;
6849 /* Finds candidates for the induction variables (item 2). */
6850 find_iv_candidates (data);
6852 /* Calculates the costs (item 3, part 1). */
6853 determine_iv_costs (data);
6854 determine_use_iv_costs (data);
6855 determine_set_costs (data);
6857 /* Find the optimal set of induction variables (item 3, part 2). */
6858 iv_ca = find_optimal_iv_set (data);
6859 if (!iv_ca)
6860 goto finish;
6861 changed = true;
6863 /* Create the new induction variables (item 4, part 1). */
6864 create_new_ivs (data, iv_ca);
6865 iv_ca_free (&iv_ca);
6867 /* Rewrite the uses (item 4, part 2). */
6868 rewrite_uses (data);
6870 /* Remove the ivs that are unused after rewriting. */
6871 remove_unused_ivs (data);
6873 /* We have changed the structure of induction variables; it might happen
6874 that definitions in the scev database refer to some of them that were
6875 eliminated. */
6876 scev_reset ();
6878 finish:
6879 free_loop_data (data);
6881 return changed;
6884 /* Main entry point. Optimizes induction variables in loops. */
6886 void
6887 tree_ssa_iv_optimize (void)
6889 struct loop *loop;
6890 struct ivopts_data data;
6892 tree_ssa_iv_optimize_init (&data);
6894 /* Optimize the loops starting with the innermost ones. */
6895 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6897 if (dump_file && (dump_flags & TDF_DETAILS))
6898 flow_loop_dump (loop, dump_file, NULL, 1);
6900 tree_ssa_iv_optimize_loop (&data, loop);
6903 tree_ssa_iv_optimize_finalize (&data);